diff --git "a/experiment_config.json" "b/experiment_config.json" new file mode 100644--- /dev/null +++ "b/experiment_config.json" @@ -0,0 +1,333622 @@ +{ + "training_args": { + "output_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_lora_v1", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": true, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 4, + "per_device_eval_batch_size": 8, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 4, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 5e-05, + "weight_decay": 0.0, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3, + "max_steps": -1, + "lr_scheduler_type": "linear", + "lr_scheduler_kwargs": {}, + "warmup_ratio": 0.0, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_lora_v1/runs/Sep10_02-34-17_gx07", + "logging_strategy": "steps", + "logging_first_step": false, + "logging_steps": 20, + "logging_nan_inf_filter": true, + "save_strategy": "epoch", + "save_steps": 500, + "save_total_limit": null, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "seed": 42, + "data_seed": null, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": false, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": [], + "dataloader_drop_last": false, + "eval_steps": 147, + "dataloader_num_workers": 0, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_lora_v1", + "disable_tqdm": false, + "remove_unused_columns": true, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": null, + "greater_is_better": null, + "ignore_data_skip": false, + "fsdp": [], + "fsdp_min_num_params": 0, + "fsdp_config": { + "min_num_params": 0, + "xla": false, + "xla_fsdp_v2": false, + "xla_fsdp_grad_ckpt": false + }, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "split_batches": false, + "dispatch_batches": null, + "even_batches": true, + "use_seedable_sampler": true, + "non_blocking": false, + "gradient_accumulation_kwargs": null + }, + "deepspeed": null, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_token": "", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": false, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": "", + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false + }, + "lora_config": { + "task_type": "CAUSAL_LM", + "peft_type": "LORA", + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.2-1B-Instruct", + "revision": null, + "inference_mode": false, + "r": 16, + "target_modules": [ + "q_proj", + "v_proj", + "gate_proj", + "down_proj", + "o_proj", + "up_proj", + "k_proj" + ], + "exclude_modules": null, + "lora_alpha": 16, + "lora_dropout": 0.1, + "fan_in_fan_out": false, + "bias": "none", + "use_rslora": true, + "modules_to_save": null, + "init_lora_weights": true, + "layers_to_transform": null, + "layers_pattern": null, + "rank_pattern": {}, + "alpha_pattern": {}, + "megatron_config": null, + "megatron_core": "megatron.core", + "trainable_token_indices": null, + "loftq_config": {}, + "eva_config": null, + "corda_config": null, + "use_dora": false, + "layer_replication": null, + "runtime_config": { + "ephemeral_gpu_offload": false + }, + "lora_bias": false + }, + "flops": { + "eval": 106689854968838400, + "train": 3.61888601646767e+16, + "total": 1.428787151335151e+17 + }, + "total_energy": 127.00658, + "logs": [ + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:34:45.888087", + "step": 0, + "epoch": 0 + }, + { + "type": "pplx", + "content": 54140675.446864516, + "timestamp": "2025-09-10 02:34:45.891761", + "step": 0, + "epoch": 0 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:45.995408", + "step": 0, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.6008338332176208, + "timestamp": "2025-09-10 02:34:45.997239", + "step": 1, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:46.064445", + "step": 1, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.5395371317863464, + "timestamp": "2025-09-10 02:34:46.066601", + "step": 2, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:46.118920", + "step": 2, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.547315239906311, + "timestamp": "2025-09-10 02:34:46.121082", + "step": 3, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:46.186525", + "step": 3, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.6588919758796692, + "timestamp": "2025-09-10 02:34:46.235931", + "step": 4, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:46.288306", + "step": 4, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.490021288394928, + "timestamp": "2025-09-10 02:34:46.290669", + "step": 5, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:34:46.360797", + "step": 5, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.41430097818374634, + "timestamp": "2025-09-10 02:34:46.362581", + "step": 6, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:46.414975", + "step": 6, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.3882477879524231, + "timestamp": "2025-09-10 02:34:46.417293", + "step": 7, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:46.482150", + "step": 7, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.5351694822311401, + "timestamp": "2025-09-10 02:34:46.488344", + "step": 8, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:46.559251", + "step": 8, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.16230842471122742, + "timestamp": "2025-09-10 02:34:46.561895", + "step": 9, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:46.614581", + "step": 9, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.12545782327651978, + "timestamp": "2025-09-10 02:34:46.617434", + "step": 10, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:46.669848", + "step": 10, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.12372326105833054, + "timestamp": "2025-09-10 02:34:46.672168", + "step": 11, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:46.725226", + "step": 11, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.14071746170520782, + "timestamp": "2025-09-10 02:34:46.730817", + "step": 12, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:46.790259", + "step": 12, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.10930095613002777, + "timestamp": "2025-09-10 02:34:46.792423", + "step": 13, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:34:46.860349", + "step": 13, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07601930946111679, + "timestamp": "2025-09-10 02:34:46.866920", + "step": 14, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:46.919533", + "step": 14, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07199852913618088, + "timestamp": "2025-09-10 02:34:46.921508", + "step": 15, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:34:46.973747", + "step": 15, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.11638077348470688, + "timestamp": "2025-09-10 02:34:46.979695", + "step": 16, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:34:47.041847", + "step": 16, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05031171813607216, + "timestamp": "2025-09-10 02:34:47.046089", + "step": 17, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:47.098775", + "step": 17, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0463133379817009, + "timestamp": "2025-09-10 02:34:47.100699", + "step": 18, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:47.152851", + "step": 18, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047386061400175095, + "timestamp": "2025-09-10 02:34:47.159013", + "step": 19, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:47.211476", + "step": 19, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.10854268074035645, + "timestamp": "2025-09-10 02:34:47.217047", + "step": 20, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:47.268622", + "step": 20, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027190817520022392, + "timestamp": "2025-09-10 02:34:47.274957", + "step": 21, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:34:47.327445", + "step": 21, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02770153246819973, + "timestamp": "2025-09-10 02:34:47.329262", + "step": 22, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:47.381654", + "step": 22, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029606414958834648, + "timestamp": "2025-09-10 02:34:47.383468", + "step": 23, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:47.436729", + "step": 23, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06987056881189346, + "timestamp": "2025-09-10 02:34:47.442180", + "step": 24, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:47.494008", + "step": 24, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023294363170862198, + "timestamp": "2025-09-10 02:34:47.496000", + "step": 25, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:47.548031", + "step": 25, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.09575532376766205, + "timestamp": "2025-09-10 02:34:47.549811", + "step": 26, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:47.602244", + "step": 26, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026402516290545464, + "timestamp": "2025-09-10 02:34:47.604203", + "step": 27, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:34:47.674084", + "step": 27, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025583965703845024, + "timestamp": "2025-09-10 02:34:47.680169", + "step": 28, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:47.735999", + "step": 28, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05738652125000954, + "timestamp": "2025-09-10 02:34:47.737773", + "step": 29, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:34:47.797646", + "step": 29, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032462701201438904, + "timestamp": "2025-09-10 02:34:47.808149", + "step": 30, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:47.860369", + "step": 30, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.12516382336616516, + "timestamp": "2025-09-10 02:34:47.862381", + "step": 31, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:47.914798", + "step": 31, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06932564079761505, + "timestamp": "2025-09-10 02:34:47.920255", + "step": 32, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:34:47.971477", + "step": 32, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01569373533129692, + "timestamp": "2025-09-10 02:34:47.973062", + "step": 33, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:48.024914", + "step": 33, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02847396209836006, + "timestamp": "2025-09-10 02:34:48.027536", + "step": 34, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:48.079740", + "step": 34, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042594462633132935, + "timestamp": "2025-09-10 02:34:48.085977", + "step": 35, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:34:48.138004", + "step": 35, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04337479919195175, + "timestamp": "2025-09-10 02:34:48.143371", + "step": 36, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:48.194982", + "step": 36, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010617688298225403, + "timestamp": "2025-09-10 02:34:48.197172", + "step": 37, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:34:48.262865", + "step": 37, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014554865658283234, + "timestamp": "2025-09-10 02:34:48.264854", + "step": 38, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:48.317418", + "step": 38, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029846573248505592, + "timestamp": "2025-09-10 02:34:48.319401", + "step": 39, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:48.373782", + "step": 39, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02213556505739689, + "timestamp": "2025-09-10 02:34:48.379453", + "step": 40, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:48.432402", + "step": 40, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03805265203118324, + "timestamp": "2025-09-10 02:34:48.434127", + "step": 41, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:48.487294", + "step": 41, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02557053044438362, + "timestamp": "2025-09-10 02:34:48.489207", + "step": 42, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:34:48.548893", + "step": 42, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02088429220020771, + "timestamp": "2025-09-10 02:34:48.559087", + "step": 43, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:48.612469", + "step": 43, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06046781316399574, + "timestamp": "2025-09-10 02:34:48.618350", + "step": 44, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:34:48.670210", + "step": 44, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021367188543081284, + "timestamp": "2025-09-10 02:34:48.671981", + "step": 45, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:48.724711", + "step": 45, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016253933310508728, + "timestamp": "2025-09-10 02:34:48.726799", + "step": 46, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:48.778932", + "step": 46, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011869534850120544, + "timestamp": "2025-09-10 02:34:48.785059", + "step": 47, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:48.837434", + "step": 47, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029137682169675827, + "timestamp": "2025-09-10 02:34:48.843350", + "step": 48, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:34:48.898093", + "step": 48, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038361966609954834, + "timestamp": "2025-09-10 02:34:48.902815", + "step": 49, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:34:48.955169", + "step": 49, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017897581681609154, + "timestamp": "2025-09-10 02:34:48.962992", + "step": 50, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:34:49.015537", + "step": 50, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03656654804944992, + "timestamp": "2025-09-10 02:34:49.017403", + "step": 51, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 928 + ], + "flops": 18560112737920.0 + }, + "timestamp": "2025-09-10 02:34:49.148836", + "step": 51, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02816540002822876, + "timestamp": "2025-09-10 02:34:49.175281", + "step": 52, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:49.227457", + "step": 52, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022283677011728287, + "timestamp": "2025-09-10 02:34:49.229211", + "step": 53, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:49.281169", + "step": 53, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01824142411351204, + "timestamp": "2025-09-10 02:34:49.287282", + "step": 54, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:49.339986", + "step": 54, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027409756556153297, + "timestamp": "2025-09-10 02:34:49.341897", + "step": 55, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:34:49.414029", + "step": 55, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018066495656967163, + "timestamp": "2025-09-10 02:34:49.426014", + "step": 56, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:34:49.477646", + "step": 56, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015896065160632133, + "timestamp": "2025-09-10 02:34:49.479600", + "step": 57, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:49.531706", + "step": 57, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032879505306482315, + "timestamp": "2025-09-10 02:34:49.533425", + "step": 58, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:49.585304", + "step": 58, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023104578256607056, + "timestamp": "2025-09-10 02:34:49.591499", + "step": 59, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:49.643922", + "step": 59, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028585180640220642, + "timestamp": "2025-09-10 02:34:49.649619", + "step": 60, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:34:49.701402", + "step": 60, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030385613441467285, + "timestamp": "2025-09-10 02:34:49.711307", + "step": 61, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:49.763354", + "step": 61, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015409083105623722, + "timestamp": "2025-09-10 02:34:49.765141", + "step": 62, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:49.816842", + "step": 62, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008990316651761532, + "timestamp": "2025-09-10 02:34:49.819669", + "step": 63, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:49.871586", + "step": 63, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027352871373295784, + "timestamp": "2025-09-10 02:34:49.877002", + "step": 64, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:34:49.942990", + "step": 64, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010400832630693913, + "timestamp": "2025-09-10 02:34:49.956471", + "step": 65, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:50.009540", + "step": 65, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02165437862277031, + "timestamp": "2025-09-10 02:34:50.011377", + "step": 66, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:34:50.063725", + "step": 66, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03493545949459076, + "timestamp": "2025-09-10 02:34:50.065750", + "step": 67, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:50.118296", + "step": 67, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018001478165388107, + "timestamp": "2025-09-10 02:34:50.124027", + "step": 68, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:50.175500", + "step": 68, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019673990085721016, + "timestamp": "2025-09-10 02:34:50.178349", + "step": 69, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:34:50.245906", + "step": 69, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01824493706226349, + "timestamp": "2025-09-10 02:34:50.258215", + "step": 70, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:50.311020", + "step": 70, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012586208060383797, + "timestamp": "2025-09-10 02:34:50.312940", + "step": 71, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:34:50.368248", + "step": 71, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004381082020699978, + "timestamp": "2025-09-10 02:34:50.377212", + "step": 72, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:50.428715", + "step": 72, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031274572014808655, + "timestamp": "2025-09-10 02:34:50.430455", + "step": 73, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:50.483000", + "step": 73, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0065897488966584206, + "timestamp": "2025-09-10 02:34:50.485267", + "step": 74, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:34:50.537626", + "step": 74, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03049614652991295, + "timestamp": "2025-09-10 02:34:50.545530", + "step": 75, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:34:50.611566", + "step": 75, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016821924597024918, + "timestamp": "2025-09-10 02:34:50.624379", + "step": 76, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:50.676079", + "step": 76, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024387311190366745, + "timestamp": "2025-09-10 02:34:50.677738", + "step": 77, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:34:50.729816", + "step": 77, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034385766834020615, + "timestamp": "2025-09-10 02:34:50.731588", + "step": 78, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:34:50.788552", + "step": 78, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025617972016334534, + "timestamp": "2025-09-10 02:34:50.798736", + "step": 79, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:34:50.864771", + "step": 79, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024418707937002182, + "timestamp": "2025-09-10 02:34:50.877538", + "step": 80, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:50.929714", + "step": 80, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01649116910994053, + "timestamp": "2025-09-10 02:34:50.931631", + "step": 81, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:34:50.984309", + "step": 81, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025838321074843407, + "timestamp": "2025-09-10 02:34:50.993752", + "step": 82, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:51.046019", + "step": 82, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022280815988779068, + "timestamp": "2025-09-10 02:34:51.052226", + "step": 83, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:51.111822", + "step": 83, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013495906256139278, + "timestamp": "2025-09-10 02:34:51.117327", + "step": 84, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:51.171830", + "step": 84, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023006485775113106, + "timestamp": "2025-09-10 02:34:51.173780", + "step": 85, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:34:51.233162", + "step": 85, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03280184417963028, + "timestamp": "2025-09-10 02:34:51.243689", + "step": 86, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:51.298413", + "step": 86, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013478524051606655, + "timestamp": "2025-09-10 02:34:51.300198", + "step": 87, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:51.353211", + "step": 87, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014801465906202793, + "timestamp": "2025-09-10 02:34:51.358968", + "step": 88, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:34:51.432966", + "step": 88, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028322791680693626, + "timestamp": "2025-09-10 02:34:51.448008", + "step": 89, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:51.501354", + "step": 89, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020225470885634422, + "timestamp": "2025-09-10 02:34:51.504548", + "step": 90, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:51.557316", + "step": 90, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016057122498750687, + "timestamp": "2025-09-10 02:34:51.559722", + "step": 91, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:34:51.617615", + "step": 91, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019442101940512657, + "timestamp": "2025-09-10 02:34:51.628625", + "step": 92, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:51.681629", + "step": 92, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027180684730410576, + "timestamp": "2025-09-10 02:34:51.687491", + "step": 93, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:51.742098", + "step": 93, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016686907038092613, + "timestamp": "2025-09-10 02:34:51.743935", + "step": 94, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:34:51.817839", + "step": 94, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014631020836532116, + "timestamp": "2025-09-10 02:34:51.831339", + "step": 95, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:51.884016", + "step": 95, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020649760961532593, + "timestamp": "2025-09-10 02:34:51.889349", + "step": 96, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:51.940557", + "step": 96, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03311503306031227, + "timestamp": "2025-09-10 02:34:51.943354", + "step": 97, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:51.996011", + "step": 97, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02003851905465126, + "timestamp": "2025-09-10 02:34:51.999153", + "step": 98, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:52.052283", + "step": 98, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01943128928542137, + "timestamp": "2025-09-10 02:34:52.054142", + "step": 99, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:52.107004", + "step": 99, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02221931517124176, + "timestamp": "2025-09-10 02:34:52.112823", + "step": 100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:34:52.164486", + "step": 100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031217534095048904, + "timestamp": "2025-09-10 02:34:52.166360", + "step": 101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:52.218710", + "step": 101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02912614308297634, + "timestamp": "2025-09-10 02:34:52.220332", + "step": 102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:52.281198", + "step": 102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036009423434734344, + "timestamp": "2025-09-10 02:34:52.283813", + "step": 103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:34:52.336329", + "step": 103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017426682636141777, + "timestamp": "2025-09-10 02:34:52.346519", + "step": 104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:34:52.398571", + "step": 104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008325684815645218, + "timestamp": "2025-09-10 02:34:52.406394", + "step": 105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:52.459271", + "step": 105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017156464979052544, + "timestamp": "2025-09-10 02:34:52.461167", + "step": 106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:52.513476", + "step": 106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023122547194361687, + "timestamp": "2025-09-10 02:34:52.515666", + "step": 107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:52.567779", + "step": 107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029338719323277473, + "timestamp": "2025-09-10 02:34:52.573579", + "step": 108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:52.625222", + "step": 108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01162067148834467, + "timestamp": "2025-09-10 02:34:52.631557", + "step": 109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:52.684166", + "step": 109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009132458828389645, + "timestamp": "2025-09-10 02:34:52.686029", + "step": 110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:34:52.739827", + "step": 110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012252326123416424, + "timestamp": "2025-09-10 02:34:52.749420", + "step": 111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:52.802490", + "step": 111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00482892943546176, + "timestamp": "2025-09-10 02:34:52.808173", + "step": 112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:52.859717", + "step": 112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027315771207213402, + "timestamp": "2025-09-10 02:34:52.861550", + "step": 113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:52.914055", + "step": 113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027633002027869225, + "timestamp": "2025-09-10 02:34:52.916177", + "step": 114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:52.968934", + "step": 114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01256472896784544, + "timestamp": "2025-09-10 02:34:52.970926", + "step": 115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:53.023268", + "step": 115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022422954440116882, + "timestamp": "2025-09-10 02:34:53.028842", + "step": 116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:34:53.084995", + "step": 116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020762605592608452, + "timestamp": "2025-09-10 02:34:53.095971", + "step": 117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:53.150162", + "step": 117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03215666860342026, + "timestamp": "2025-09-10 02:34:53.153151", + "step": 118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:34:53.208917", + "step": 118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01391049288213253, + "timestamp": "2025-09-10 02:34:53.218477", + "step": 119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:34:53.276156", + "step": 119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021440556272864342, + "timestamp": "2025-09-10 02:34:53.287185", + "step": 120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:34:53.339259", + "step": 120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0282118059694767, + "timestamp": "2025-09-10 02:34:53.349203", + "step": 121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:53.401702", + "step": 121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009898746386170387, + "timestamp": "2025-09-10 02:34:53.407994", + "step": 122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:53.460467", + "step": 122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017764005810022354, + "timestamp": "2025-09-10 02:34:53.462505", + "step": 123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:34:53.515071", + "step": 123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013925476931035519, + "timestamp": "2025-09-10 02:34:53.523774", + "step": 124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:34:53.576828", + "step": 124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046622104942798615, + "timestamp": "2025-09-10 02:34:53.578762", + "step": 125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:53.646326", + "step": 125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016143329441547394, + "timestamp": "2025-09-10 02:34:53.648300", + "step": 126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:34:53.710054", + "step": 126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02423417940735817, + "timestamp": "2025-09-10 02:34:53.720946", + "step": 127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:34:53.773868", + "step": 127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014292905107140541, + "timestamp": "2025-09-10 02:34:53.779825", + "step": 128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:34:53.833318", + "step": 128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025560855865478516, + "timestamp": "2025-09-10 02:34:53.843616", + "step": 129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:34:53.896737", + "step": 129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029963672161102295, + "timestamp": "2025-09-10 02:34:53.898917", + "step": 130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:53.953559", + "step": 130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017245622351765633, + "timestamp": "2025-09-10 02:34:53.955367", + "step": 131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:34:54.007814", + "step": 131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013878666795790195, + "timestamp": "2025-09-10 02:34:54.013700", + "step": 132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:54.066441", + "step": 132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026786338537931442, + "timestamp": "2025-09-10 02:34:54.068475", + "step": 133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:54.121367", + "step": 133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014176661148667336, + "timestamp": "2025-09-10 02:34:54.124126", + "step": 134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:34:54.177741", + "step": 134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015191344544291496, + "timestamp": "2025-09-10 02:34:54.187152", + "step": 135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:54.240469", + "step": 135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01789441891014576, + "timestamp": "2025-09-10 02:34:54.246496", + "step": 136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:34:54.298462", + "step": 136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010462201200425625, + "timestamp": "2025-09-10 02:34:54.308481", + "step": 137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:54.362454", + "step": 137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012132969684898853, + "timestamp": "2025-09-10 02:34:54.364524", + "step": 138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:34:54.417439", + "step": 138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005434608552604914, + "timestamp": "2025-09-10 02:34:54.419568", + "step": 139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:54.472660", + "step": 139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017947377637028694, + "timestamp": "2025-09-10 02:34:54.478745", + "step": 140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:34:54.537342", + "step": 140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02349710650742054, + "timestamp": "2025-09-10 02:34:54.548743", + "step": 141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:54.601777", + "step": 141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010277139954268932, + "timestamp": "2025-09-10 02:34:54.603774", + "step": 142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:34:54.656381", + "step": 142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03177471458911896, + "timestamp": "2025-09-10 02:34:54.658539", + "step": 143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:34:54.710999", + "step": 143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027295321226119995, + "timestamp": "2025-09-10 02:34:54.716868", + "step": 144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:54.769275", + "step": 144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009652199223637581, + "timestamp": "2025-09-10 02:34:54.775651", + "step": 145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:34:54.828783", + "step": 145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03103617951273918, + "timestamp": "2025-09-10 02:34:54.830682", + "step": 146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:34:54.883225", + "step": 146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023361459374427795, + "timestamp": "2025-09-10 02:34:54.889589", + "step": 147, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:35:11.621207", + "step": 147, + "epoch": 1 + }, + { + "type": "pplx", + "content": 19670390.152276028, + "timestamp": "2025-09-10 02:35:11.624260", + "step": 147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:11.678967", + "step": 147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010303986258804798, + "timestamp": "2025-09-10 02:35:11.685027", + "step": 148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:11.737709", + "step": 148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009087040089070797, + "timestamp": "2025-09-10 02:35:11.739700", + "step": 149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:11.793579", + "step": 149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01798897422850132, + "timestamp": "2025-09-10 02:35:11.795789", + "step": 150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:35:11.858128", + "step": 150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03148674964904785, + "timestamp": "2025-09-10 02:35:11.869067", + "step": 151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:11.922495", + "step": 151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01954035647213459, + "timestamp": "2025-09-10 02:35:11.929800", + "step": 152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:35:11.981637", + "step": 152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03088425099849701, + "timestamp": "2025-09-10 02:35:11.983531", + "step": 153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:12.035902", + "step": 153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022318672388792038, + "timestamp": "2025-09-10 02:35:12.038107", + "step": 154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:12.092568", + "step": 154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00890205055475235, + "timestamp": "2025-09-10 02:35:12.102410", + "step": 155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:12.155800", + "step": 155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026110004633665085, + "timestamp": "2025-09-10 02:35:12.161784", + "step": 156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:35:12.213673", + "step": 156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027041813358664513, + "timestamp": "2025-09-10 02:35:12.215971", + "step": 157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:12.269231", + "step": 157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013711978681385517, + "timestamp": "2025-09-10 02:35:12.278827", + "step": 158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:12.333247", + "step": 158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01726366952061653, + "timestamp": "2025-09-10 02:35:12.343030", + "step": 159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:12.396684", + "step": 159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01840554177761078, + "timestamp": "2025-09-10 02:35:12.402696", + "step": 160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:12.455600", + "step": 160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03681541234254837, + "timestamp": "2025-09-10 02:35:12.457692", + "step": 161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:12.511047", + "step": 161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00946278590708971, + "timestamp": "2025-09-10 02:35:12.513105", + "step": 162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:12.565948", + "step": 162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03138422220945358, + "timestamp": "2025-09-10 02:35:12.568150", + "step": 163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:12.620901", + "step": 163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013762605376541615, + "timestamp": "2025-09-10 02:35:12.626911", + "step": 164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:12.678627", + "step": 164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0059081860817968845, + "timestamp": "2025-09-10 02:35:12.680461", + "step": 165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:12.733022", + "step": 165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018755516037344933, + "timestamp": "2025-09-10 02:35:12.739660", + "step": 166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:12.793157", + "step": 166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02642093226313591, + "timestamp": "2025-09-10 02:35:12.802772", + "step": 167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:35:12.862505", + "step": 167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013496960513293743, + "timestamp": "2025-09-10 02:35:12.874018", + "step": 168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:12.930412", + "step": 168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041730333119630814, + "timestamp": "2025-09-10 02:35:12.941648", + "step": 169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:12.994841", + "step": 169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0057114423252642155, + "timestamp": "2025-09-10 02:35:12.997263", + "step": 170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:13.051186", + "step": 170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026038197800517082, + "timestamp": "2025-09-10 02:35:13.053365", + "step": 171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:13.107940", + "step": 171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013238680548965931, + "timestamp": "2025-09-10 02:35:13.118516", + "step": 172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:35:13.178000", + "step": 172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014438546262681484, + "timestamp": "2025-09-10 02:35:13.189579", + "step": 173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:13.242761", + "step": 173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012119495309889317, + "timestamp": "2025-09-10 02:35:13.244698", + "step": 174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:13.297684", + "step": 174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021922742947936058, + "timestamp": "2025-09-10 02:35:13.299650", + "step": 175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:13.353561", + "step": 175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04477754235267639, + "timestamp": "2025-09-10 02:35:13.364128", + "step": 176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:13.415938", + "step": 176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010975569486618042, + "timestamp": "2025-09-10 02:35:13.418070", + "step": 177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:35:13.507803", + "step": 177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02194954827427864, + "timestamp": "2025-09-10 02:35:13.524962", + "step": 178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:13.579344", + "step": 178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02227923832833767, + "timestamp": "2025-09-10 02:35:13.589177", + "step": 179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:13.641729", + "step": 179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011062792502343655, + "timestamp": "2025-09-10 02:35:13.647609", + "step": 180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:13.699971", + "step": 180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030542535707354546, + "timestamp": "2025-09-10 02:35:13.702148", + "step": 181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:35:13.762170", + "step": 181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037561897188425064, + "timestamp": "2025-09-10 02:35:13.772894", + "step": 182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:35:13.834136", + "step": 182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021802807226777077, + "timestamp": "2025-09-10 02:35:13.845274", + "step": 183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:13.898490", + "step": 183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03886691480875015, + "timestamp": "2025-09-10 02:35:13.904351", + "step": 184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:13.961312", + "step": 184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025196580216288567, + "timestamp": "2025-09-10 02:35:13.963505", + "step": 185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:14.016439", + "step": 185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01903093047440052, + "timestamp": "2025-09-10 02:35:14.018578", + "step": 186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:14.071012", + "step": 186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020099657122045755, + "timestamp": "2025-09-10 02:35:14.072908", + "step": 187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:14.125180", + "step": 187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01556878350675106, + "timestamp": "2025-09-10 02:35:14.132569", + "step": 188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:14.184238", + "step": 188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02499142661690712, + "timestamp": "2025-09-10 02:35:14.186226", + "step": 189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:14.238459", + "step": 189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024990113452076912, + "timestamp": "2025-09-10 02:35:14.241607", + "step": 190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:14.293976", + "step": 190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00737761938944459, + "timestamp": "2025-09-10 02:35:14.296285", + "step": 191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:35:14.356894", + "step": 191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015332886017858982, + "timestamp": "2025-09-10 02:35:14.368533", + "step": 192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:14.420478", + "step": 192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020425716415047646, + "timestamp": "2025-09-10 02:35:14.423438", + "step": 193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:14.476022", + "step": 193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017640886828303337, + "timestamp": "2025-09-10 02:35:14.478043", + "step": 194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:14.530606", + "step": 194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013645542785525322, + "timestamp": "2025-09-10 02:35:14.532726", + "step": 195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:14.585391", + "step": 195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023901494219899178, + "timestamp": "2025-09-10 02:35:14.591286", + "step": 196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:14.643567", + "step": 196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05236082524061203, + "timestamp": "2025-09-10 02:35:14.649894", + "step": 197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:14.702833", + "step": 197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0183752179145813, + "timestamp": "2025-09-10 02:35:14.704928", + "step": 198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:14.757323", + "step": 198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01529760006815195, + "timestamp": "2025-09-10 02:35:14.759418", + "step": 199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:14.812770", + "step": 199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018185382708907127, + "timestamp": "2025-09-10 02:35:14.821800", + "step": 200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:14.874427", + "step": 200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020868321880698204, + "timestamp": "2025-09-10 02:35:14.876542", + "step": 201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:14.929175", + "step": 201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008937294594943523, + "timestamp": "2025-09-10 02:35:14.931334", + "step": 202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:14.984624", + "step": 202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008630351163446903, + "timestamp": "2025-09-10 02:35:14.986893", + "step": 203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:15.040527", + "step": 203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020822303369641304, + "timestamp": "2025-09-10 02:35:15.046383", + "step": 204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:15.098040", + "step": 204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004343842621892691, + "timestamp": "2025-09-10 02:35:15.101110", + "step": 205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:15.153775", + "step": 205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031640089582651854, + "timestamp": "2025-09-10 02:35:15.155840", + "step": 206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:15.208497", + "step": 206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01837976649403572, + "timestamp": "2025-09-10 02:35:15.210770", + "step": 207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:15.264206", + "step": 207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018046779558062553, + "timestamp": "2025-09-10 02:35:15.274523", + "step": 208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:15.327275", + "step": 208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0076715522445738316, + "timestamp": "2025-09-10 02:35:15.329971", + "step": 209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:15.383139", + "step": 209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0058698770590126514, + "timestamp": "2025-09-10 02:35:15.385333", + "step": 210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:15.437789", + "step": 210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03286213427782059, + "timestamp": "2025-09-10 02:35:15.440810", + "step": 211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:15.493525", + "step": 211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015056388452649117, + "timestamp": "2025-09-10 02:35:15.499408", + "step": 212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:15.551083", + "step": 212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005245543550699949, + "timestamp": "2025-09-10 02:35:15.553447", + "step": 213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:15.606607", + "step": 213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025814538821578026, + "timestamp": "2025-09-10 02:35:15.609007", + "step": 214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:15.661618", + "step": 214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01814667321741581, + "timestamp": "2025-09-10 02:35:15.664108", + "step": 215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:15.716762", + "step": 215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02777264080941677, + "timestamp": "2025-09-10 02:35:15.722633", + "step": 216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:15.774598", + "step": 216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0160059817135334, + "timestamp": "2025-09-10 02:35:15.776706", + "step": 217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:15.834994", + "step": 217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046521928161382675, + "timestamp": "2025-09-10 02:35:15.845367", + "step": 218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:15.898771", + "step": 218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008506479673087597, + "timestamp": "2025-09-10 02:35:15.901016", + "step": 219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:15.955431", + "step": 219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014576002024114132, + "timestamp": "2025-09-10 02:35:15.965969", + "step": 220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:35:16.024658", + "step": 220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004695187322795391, + "timestamp": "2025-09-10 02:35:16.036168", + "step": 221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:16.089549", + "step": 221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030469149351119995, + "timestamp": "2025-09-10 02:35:16.091624", + "step": 222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:16.144707", + "step": 222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00731209060177207, + "timestamp": "2025-09-10 02:35:16.146884", + "step": 223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:16.199407", + "step": 223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00895555317401886, + "timestamp": "2025-09-10 02:35:16.205364", + "step": 224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:35:16.286947", + "step": 224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014731078408658504, + "timestamp": "2025-09-10 02:35:16.303400", + "step": 225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:16.357335", + "step": 225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03776868060231209, + "timestamp": "2025-09-10 02:35:16.365322", + "step": 226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:16.417941", + "step": 226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006093989592045546, + "timestamp": "2025-09-10 02:35:16.425723", + "step": 227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:16.478478", + "step": 227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016133157536387444, + "timestamp": "2025-09-10 02:35:16.484402", + "step": 228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:16.541287", + "step": 228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025382960215210915, + "timestamp": "2025-09-10 02:35:16.552471", + "step": 229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:16.607586", + "step": 229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03177328780293465, + "timestamp": "2025-09-10 02:35:16.617371", + "step": 230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:35:16.684221", + "step": 230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005937655922025442, + "timestamp": "2025-09-10 02:35:16.696469", + "step": 231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:16.749465", + "step": 231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021884040907025337, + "timestamp": "2025-09-10 02:35:16.756417", + "step": 232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:16.828347", + "step": 232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033356085419654846, + "timestamp": "2025-09-10 02:35:16.838266", + "step": 233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:16.897654", + "step": 233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017480988055467606, + "timestamp": "2025-09-10 02:35:16.907205", + "step": 234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:16.973785", + "step": 234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04444620758295059, + "timestamp": "2025-09-10 02:35:16.976021", + "step": 235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:17.029330", + "step": 235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02271200343966484, + "timestamp": "2025-09-10 02:35:17.035477", + "step": 236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:17.087888", + "step": 236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02542167343199253, + "timestamp": "2025-09-10 02:35:17.090626", + "step": 237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:17.143441", + "step": 237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014857185073196888, + "timestamp": "2025-09-10 02:35:17.145527", + "step": 238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:17.198064", + "step": 238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008329623378813267, + "timestamp": "2025-09-10 02:35:17.200133", + "step": 239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:35:17.268370", + "step": 239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017592785879969597, + "timestamp": "2025-09-10 02:35:17.280828", + "step": 240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:17.334901", + "step": 240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006798648275434971, + "timestamp": "2025-09-10 02:35:17.337668", + "step": 241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:17.389774", + "step": 241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028966281563043594, + "timestamp": "2025-09-10 02:35:17.391883", + "step": 242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:17.444531", + "step": 242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027059515938162804, + "timestamp": "2025-09-10 02:35:17.447015", + "step": 243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:35:17.499692", + "step": 243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027755940333008766, + "timestamp": "2025-09-10 02:35:17.507136", + "step": 244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:17.561771", + "step": 244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03391401469707489, + "timestamp": "2025-09-10 02:35:17.563767", + "step": 245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:17.616108", + "step": 245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01781395450234413, + "timestamp": "2025-09-10 02:35:17.618339", + "step": 246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:17.671172", + "step": 246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012044159695506096, + "timestamp": "2025-09-10 02:35:17.674272", + "step": 247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:35:17.742367", + "step": 247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008590635843575, + "timestamp": "2025-09-10 02:35:17.755742", + "step": 248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:17.808358", + "step": 248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03630336374044418, + "timestamp": "2025-09-10 02:35:17.810532", + "step": 249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:17.868550", + "step": 249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009653487242758274, + "timestamp": "2025-09-10 02:35:17.879016", + "step": 250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:17.932146", + "step": 250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035890305880457163, + "timestamp": "2025-09-10 02:35:17.934204", + "step": 251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:17.988605", + "step": 251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0024094083346426487, + "timestamp": "2025-09-10 02:35:17.999245", + "step": 252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:18.051281", + "step": 252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00296982005238533, + "timestamp": "2025-09-10 02:35:18.053541", + "step": 253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:18.106193", + "step": 253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02122844196856022, + "timestamp": "2025-09-10 02:35:18.111288", + "step": 254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:18.163941", + "step": 254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019904855638742447, + "timestamp": "2025-09-10 02:35:18.166123", + "step": 255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:18.218451", + "step": 255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006383887492120266, + "timestamp": "2025-09-10 02:35:18.224426", + "step": 256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:18.276965", + "step": 256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01735054701566696, + "timestamp": "2025-09-10 02:35:18.280039", + "step": 257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:18.333173", + "step": 257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010717264376580715, + "timestamp": "2025-09-10 02:35:18.335265", + "step": 258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:18.388014", + "step": 258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008735094219446182, + "timestamp": "2025-09-10 02:35:18.389998", + "step": 259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:18.442698", + "step": 259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02538483962416649, + "timestamp": "2025-09-10 02:35:18.448600", + "step": 260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:18.505052", + "step": 260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020319253206253052, + "timestamp": "2025-09-10 02:35:18.516210", + "step": 261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:18.569134", + "step": 261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05068264529109001, + "timestamp": "2025-09-10 02:35:18.571195", + "step": 262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:35:18.624077", + "step": 262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019700633361935616, + "timestamp": "2025-09-10 02:35:18.626304", + "step": 263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:18.679409", + "step": 263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03039342351257801, + "timestamp": "2025-09-10 02:35:18.688261", + "step": 264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:18.740116", + "step": 264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016535136848688126, + "timestamp": "2025-09-10 02:35:18.743100", + "step": 265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:18.800715", + "step": 265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04237798973917961, + "timestamp": "2025-09-10 02:35:18.811169", + "step": 266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:18.864103", + "step": 266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018209638074040413, + "timestamp": "2025-09-10 02:35:18.867165", + "step": 267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:35:18.919117", + "step": 267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015615333802998066, + "timestamp": "2025-09-10 02:35:18.924899", + "step": 268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:18.976888", + "step": 268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025077302008867264, + "timestamp": "2025-09-10 02:35:18.978983", + "step": 269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:19.033182", + "step": 269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007397750858217478, + "timestamp": "2025-09-10 02:35:19.042956", + "step": 270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:19.096719", + "step": 270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007934799417853355, + "timestamp": "2025-09-10 02:35:19.099061", + "step": 271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:19.152208", + "step": 271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029624303802847862, + "timestamp": "2025-09-10 02:35:19.158490", + "step": 272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:19.211762", + "step": 272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017695227637887, + "timestamp": "2025-09-10 02:35:19.213927", + "step": 273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:19.266325", + "step": 273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01380168367177248, + "timestamp": "2025-09-10 02:35:19.268474", + "step": 274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:19.320542", + "step": 274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007517028599977493, + "timestamp": "2025-09-10 02:35:19.322446", + "step": 275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:19.375231", + "step": 275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009313727729022503, + "timestamp": "2025-09-10 02:35:19.380909", + "step": 276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:35:19.439432", + "step": 276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02604990266263485, + "timestamp": "2025-09-10 02:35:19.451027", + "step": 277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:19.503948", + "step": 277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013054589740931988, + "timestamp": "2025-09-10 02:35:19.506033", + "step": 278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:19.558328", + "step": 278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011929529719054699, + "timestamp": "2025-09-10 02:35:19.560365", + "step": 279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:19.612748", + "step": 279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009011547081172466, + "timestamp": "2025-09-10 02:35:19.618610", + "step": 280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:19.670507", + "step": 280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02468419075012207, + "timestamp": "2025-09-10 02:35:19.680780", + "step": 281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:35:19.733387", + "step": 281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00777811324223876, + "timestamp": "2025-09-10 02:35:19.735557", + "step": 282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:19.787907", + "step": 282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030644703656435013, + "timestamp": "2025-09-10 02:35:19.790025", + "step": 283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:19.842598", + "step": 283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02055281400680542, + "timestamp": "2025-09-10 02:35:19.848176", + "step": 284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:35:19.900113", + "step": 284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02825690247118473, + "timestamp": "2025-09-10 02:35:19.902183", + "step": 285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:19.954681", + "step": 285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014599240384995937, + "timestamp": "2025-09-10 02:35:19.956946", + "step": 286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:20.009750", + "step": 286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009029184468090534, + "timestamp": "2025-09-10 02:35:20.012246", + "step": 287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:20.069617", + "step": 287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009579007513821125, + "timestamp": "2025-09-10 02:35:20.088602", + "step": 288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:20.149928", + "step": 288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028099318966269493, + "timestamp": "2025-09-10 02:35:20.156004", + "step": 289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:20.222737", + "step": 289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020475992932915688, + "timestamp": "2025-09-10 02:35:20.225451", + "step": 290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:20.287218", + "step": 290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009952199645340443, + "timestamp": "2025-09-10 02:35:20.296766", + "step": 291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:20.359226", + "step": 291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032330357935279608, + "timestamp": "2025-09-10 02:35:20.366041", + "step": 292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:20.434462", + "step": 292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01548854261636734, + "timestamp": "2025-09-10 02:35:20.440080", + "step": 293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:20.510435", + "step": 293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012301183305680752, + "timestamp": "2025-09-10 02:35:20.512982", + "step": 294, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:35:37.283257", + "step": 294, + "epoch": 1 + }, + { + "type": "pplx", + "content": 18750520.76587751, + "timestamp": "2025-09-10 02:35:37.286063", + "step": 294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:35:37.345243", + "step": 294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018759505823254585, + "timestamp": "2025-09-10 02:35:37.355905", + "step": 295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:35:37.417674", + "step": 295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015110853128135204, + "timestamp": "2025-09-10 02:35:37.429566", + "step": 296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:37.482442", + "step": 296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014034484513103962, + "timestamp": "2025-09-10 02:35:37.484570", + "step": 297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:37.537534", + "step": 297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008410094305872917, + "timestamp": "2025-09-10 02:35:37.539580", + "step": 298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:35:37.605684", + "step": 298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012407462112605572, + "timestamp": "2025-09-10 02:35:37.617909", + "step": 299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:35:37.670483", + "step": 299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03873913362622261, + "timestamp": "2025-09-10 02:35:37.676484", + "step": 300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:37.728626", + "step": 300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013834113255143166, + "timestamp": "2025-09-10 02:35:37.730541", + "step": 301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:37.783932", + "step": 301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03874850645661354, + "timestamp": "2025-09-10 02:35:37.789928", + "step": 302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:37.844381", + "step": 302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04323044791817665, + "timestamp": "2025-09-10 02:35:37.849821", + "step": 303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:37.904337", + "step": 303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01321121584624052, + "timestamp": "2025-09-10 02:35:37.910701", + "step": 304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:37.964091", + "step": 304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01439062599092722, + "timestamp": "2025-09-10 02:35:37.969949", + "step": 305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:38.023377", + "step": 305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01884712092578411, + "timestamp": "2025-09-10 02:35:38.030853", + "step": 306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:38.085130", + "step": 306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01008074451237917, + "timestamp": "2025-09-10 02:35:38.094951", + "step": 307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:38.148115", + "step": 307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01976635865867138, + "timestamp": "2025-09-10 02:35:38.154101", + "step": 308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:38.206363", + "step": 308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015738816931843758, + "timestamp": "2025-09-10 02:35:38.208593", + "step": 309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:38.261153", + "step": 309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006744579412043095, + "timestamp": "2025-09-10 02:35:38.263257", + "step": 310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:38.315786", + "step": 310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008818583562970161, + "timestamp": "2025-09-10 02:35:38.318142", + "step": 311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:35:38.379558", + "step": 311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008585556410253048, + "timestamp": "2025-09-10 02:35:38.391424", + "step": 312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:35:38.450215", + "step": 312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022689757868647575, + "timestamp": "2025-09-10 02:35:38.461806", + "step": 313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:38.514991", + "step": 313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018085205927491188, + "timestamp": "2025-09-10 02:35:38.524618", + "step": 314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:38.578333", + "step": 314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025187838822603226, + "timestamp": "2025-09-10 02:35:38.580440", + "step": 315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:38.643387", + "step": 315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021669624373316765, + "timestamp": "2025-09-10 02:35:38.649529", + "step": 316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:38.704464", + "step": 316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018435750156641006, + "timestamp": "2025-09-10 02:35:38.706739", + "step": 317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:38.760944", + "step": 317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025637488812208176, + "timestamp": "2025-09-10 02:35:38.767968", + "step": 318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:38.821051", + "step": 318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017072511836886406, + "timestamp": "2025-09-10 02:35:38.823215", + "step": 319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:38.876069", + "step": 319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01036195270717144, + "timestamp": "2025-09-10 02:35:38.883407", + "step": 320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:35:38.948276", + "step": 320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012251333333551884, + "timestamp": "2025-09-10 02:35:38.961481", + "step": 321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:39.013802", + "step": 321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013256451115012169, + "timestamp": "2025-09-10 02:35:39.016809", + "step": 322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:39.071579", + "step": 322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017366288229823112, + "timestamp": "2025-09-10 02:35:39.081373", + "step": 323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:39.134874", + "step": 323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019963664934039116, + "timestamp": "2025-09-10 02:35:39.141251", + "step": 324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:39.193938", + "step": 324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029277343302965164, + "timestamp": "2025-09-10 02:35:39.196312", + "step": 325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:39.249302", + "step": 325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006462120451033115, + "timestamp": "2025-09-10 02:35:39.251365", + "step": 326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:39.304623", + "step": 326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021544145420193672, + "timestamp": "2025-09-10 02:35:39.306956", + "step": 327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:39.359398", + "step": 327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024486379697918892, + "timestamp": "2025-09-10 02:35:39.366690", + "step": 328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:39.418967", + "step": 328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026301609352231026, + "timestamp": "2025-09-10 02:35:39.420887", + "step": 329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:39.473380", + "step": 329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028725922107696533, + "timestamp": "2025-09-10 02:35:39.475376", + "step": 330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:39.528704", + "step": 330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023373592644929886, + "timestamp": "2025-09-10 02:35:39.530647", + "step": 331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:39.584100", + "step": 331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028214115649461746, + "timestamp": "2025-09-10 02:35:39.590856", + "step": 332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:39.643890", + "step": 332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03408941254019737, + "timestamp": "2025-09-10 02:35:39.646652", + "step": 333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:39.699175", + "step": 333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013487817719578743, + "timestamp": "2025-09-10 02:35:39.701330", + "step": 334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:39.754014", + "step": 334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02832706831395626, + "timestamp": "2025-09-10 02:35:39.756016", + "step": 335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:39.808984", + "step": 335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026079160161316395, + "timestamp": "2025-09-10 02:35:39.815386", + "step": 336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:39.869596", + "step": 336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008859396912157536, + "timestamp": "2025-09-10 02:35:39.871804", + "step": 337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:39.926530", + "step": 337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00938789639621973, + "timestamp": "2025-09-10 02:35:39.928794", + "step": 338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:39.981900", + "step": 338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006473448593169451, + "timestamp": "2025-09-10 02:35:39.984056", + "step": 339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:40.037055", + "step": 339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020039811730384827, + "timestamp": "2025-09-10 02:35:40.043350", + "step": 340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:40.095703", + "step": 340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02735760621726513, + "timestamp": "2025-09-10 02:35:40.101820", + "step": 341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:35:40.155176", + "step": 341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01631229557096958, + "timestamp": "2025-09-10 02:35:40.157361", + "step": 342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:40.210890", + "step": 342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003340591909363866, + "timestamp": "2025-09-10 02:35:40.213161", + "step": 343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:40.266218", + "step": 343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012631870806217194, + "timestamp": "2025-09-10 02:35:40.272465", + "step": 344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:40.324789", + "step": 344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013385455124080181, + "timestamp": "2025-09-10 02:35:40.334446", + "step": 345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:40.387958", + "step": 345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024300584569573402, + "timestamp": "2025-09-10 02:35:40.390000", + "step": 346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:40.444166", + "step": 346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011237295344471931, + "timestamp": "2025-09-10 02:35:40.450478", + "step": 347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:40.505473", + "step": 347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01617843471467495, + "timestamp": "2025-09-10 02:35:40.511792", + "step": 348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:40.565897", + "step": 348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023241808637976646, + "timestamp": "2025-09-10 02:35:40.567873", + "step": 349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:40.620972", + "step": 349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010824748314917088, + "timestamp": "2025-09-10 02:35:40.623080", + "step": 350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:40.677188", + "step": 350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02128290943801403, + "timestamp": "2025-09-10 02:35:40.684603", + "step": 351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:40.737193", + "step": 351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003326027188450098, + "timestamp": "2025-09-10 02:35:40.743257", + "step": 352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:40.795034", + "step": 352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022350847721099854, + "timestamp": "2025-09-10 02:35:40.796829", + "step": 353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:40.848795", + "step": 353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01989401876926422, + "timestamp": "2025-09-10 02:35:40.851724", + "step": 354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:40.904797", + "step": 354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008281980641186237, + "timestamp": "2025-09-10 02:35:40.914432", + "step": 355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:40.973777", + "step": 355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012412874028086662, + "timestamp": "2025-09-10 02:35:40.985021", + "step": 356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:41.043620", + "step": 356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004053934942930937, + "timestamp": "2025-09-10 02:35:41.051651", + "step": 357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:35:41.151175", + "step": 357, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039582908153533936, + "timestamp": "2025-09-10 02:35:41.169816", + "step": 358, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:41.222766", + "step": 358, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004859428387135267, + "timestamp": "2025-09-10 02:35:41.224646", + "step": 359, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:41.299104", + "step": 359, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007610120810568333, + "timestamp": "2025-09-10 02:35:41.310546", + "step": 360, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:41.364004", + "step": 360, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004768159706145525, + "timestamp": "2025-09-10 02:35:41.366068", + "step": 361, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:41.418920", + "step": 361, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018082713708281517, + "timestamp": "2025-09-10 02:35:41.421175", + "step": 362, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:41.474265", + "step": 362, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021560117602348328, + "timestamp": "2025-09-10 02:35:41.476372", + "step": 363, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:41.530469", + "step": 363, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006435369607061148, + "timestamp": "2025-09-10 02:35:41.541155", + "step": 364, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:41.596482", + "step": 364, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015123516321182251, + "timestamp": "2025-09-10 02:35:41.598288", + "step": 365, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:41.650845", + "step": 365, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005298522301018238, + "timestamp": "2025-09-10 02:35:41.653846", + "step": 366, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:41.706285", + "step": 366, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012734637595713139, + "timestamp": "2025-09-10 02:35:41.708311", + "step": 367, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:41.761064", + "step": 367, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021002424880862236, + "timestamp": "2025-09-10 02:35:41.766854", + "step": 368, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:41.818974", + "step": 368, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016581466421484947, + "timestamp": "2025-09-10 02:35:41.820998", + "step": 369, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:41.873811", + "step": 369, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038347463123500347, + "timestamp": "2025-09-10 02:35:41.875842", + "step": 370, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:41.929116", + "step": 370, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0073821512050926685, + "timestamp": "2025-09-10 02:35:41.931457", + "step": 371, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:41.984624", + "step": 371, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008205964230000973, + "timestamp": "2025-09-10 02:35:41.993341", + "step": 372, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:42.045445", + "step": 372, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025237463414669037, + "timestamp": "2025-09-10 02:35:42.047508", + "step": 373, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:42.100503", + "step": 373, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040251873433589935, + "timestamp": "2025-09-10 02:35:42.102448", + "step": 374, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:42.155237", + "step": 374, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004792653955519199, + "timestamp": "2025-09-10 02:35:42.157380", + "step": 375, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:42.211929", + "step": 375, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02295234613120556, + "timestamp": "2025-09-10 02:35:42.222528", + "step": 376, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:35:42.295572", + "step": 376, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015930820256471634, + "timestamp": "2025-09-10 02:35:42.310942", + "step": 377, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:42.363712", + "step": 377, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034202445298433304, + "timestamp": "2025-09-10 02:35:42.365655", + "step": 378, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:42.417788", + "step": 378, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03493393212556839, + "timestamp": "2025-09-10 02:35:42.419694", + "step": 379, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:42.472219", + "step": 379, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020121442154049873, + "timestamp": "2025-09-10 02:35:42.477945", + "step": 380, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:42.530060", + "step": 380, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007878902368247509, + "timestamp": "2025-09-10 02:35:42.532168", + "step": 381, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:42.584357", + "step": 381, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033871701452881098, + "timestamp": "2025-09-10 02:35:42.586494", + "step": 382, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:42.639044", + "step": 382, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012487685307860374, + "timestamp": "2025-09-10 02:35:42.641094", + "step": 383, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:42.693622", + "step": 383, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007192258723080158, + "timestamp": "2025-09-10 02:35:42.701197", + "step": 384, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:42.757458", + "step": 384, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02217181958258152, + "timestamp": "2025-09-10 02:35:42.768815", + "step": 385, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:42.821616", + "step": 385, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03604666516184807, + "timestamp": "2025-09-10 02:35:42.823513", + "step": 386, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:42.877126", + "step": 386, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008639396168291569, + "timestamp": "2025-09-10 02:35:42.879176", + "step": 387, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:35:42.939533", + "step": 387, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016208883374929428, + "timestamp": "2025-09-10 02:35:42.951008", + "step": 388, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:43.003996", + "step": 388, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012816510163247585, + "timestamp": "2025-09-10 02:35:43.006275", + "step": 389, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:43.059689", + "step": 389, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013577081263065338, + "timestamp": "2025-09-10 02:35:43.061776", + "step": 390, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:43.114527", + "step": 390, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03696885332465172, + "timestamp": "2025-09-10 02:35:43.122891", + "step": 391, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:43.175727", + "step": 391, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026915784925222397, + "timestamp": "2025-09-10 02:35:43.181231", + "step": 392, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:43.233258", + "step": 392, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02463557943701744, + "timestamp": "2025-09-10 02:35:43.239965", + "step": 393, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:43.292812", + "step": 393, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020221231505274773, + "timestamp": "2025-09-10 02:35:43.299090", + "step": 394, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:43.352087", + "step": 394, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013852830044925213, + "timestamp": "2025-09-10 02:35:43.354280", + "step": 395, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:43.408539", + "step": 395, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01046758983284235, + "timestamp": "2025-09-10 02:35:43.419132", + "step": 396, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:43.471354", + "step": 396, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007396661676466465, + "timestamp": "2025-09-10 02:35:43.473435", + "step": 397, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:43.526422", + "step": 397, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032712691463530064, + "timestamp": "2025-09-10 02:35:43.528379", + "step": 398, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:43.581736", + "step": 398, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04277123883366585, + "timestamp": "2025-09-10 02:35:43.583408", + "step": 399, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:43.636410", + "step": 399, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016597582027316093, + "timestamp": "2025-09-10 02:35:43.642256", + "step": 400, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:43.694545", + "step": 400, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027218716219067574, + "timestamp": "2025-09-10 02:35:43.696268", + "step": 401, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:43.748806", + "step": 401, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0064104353077709675, + "timestamp": "2025-09-10 02:35:43.754927", + "step": 402, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:43.809487", + "step": 402, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016035081818699837, + "timestamp": "2025-09-10 02:35:43.819312", + "step": 403, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:43.871950", + "step": 403, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005248712841421366, + "timestamp": "2025-09-10 02:35:43.877919", + "step": 404, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:43.929769", + "step": 404, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019017385318875313, + "timestamp": "2025-09-10 02:35:43.931887", + "step": 405, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:43.984046", + "step": 405, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02356548048555851, + "timestamp": "2025-09-10 02:35:43.985842", + "step": 406, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:44.038349", + "step": 406, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014385750517249107, + "timestamp": "2025-09-10 02:35:44.040169", + "step": 407, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:44.092877", + "step": 407, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008769115433096886, + "timestamp": "2025-09-10 02:35:44.103237", + "step": 408, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:35:44.156294", + "step": 408, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016957219690084457, + "timestamp": "2025-09-10 02:35:44.166797", + "step": 409, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:44.219772", + "step": 409, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025768084451556206, + "timestamp": "2025-09-10 02:35:44.221903", + "step": 410, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:44.274877", + "step": 410, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026145832613110542, + "timestamp": "2025-09-10 02:35:44.276915", + "step": 411, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:44.329781", + "step": 411, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004151365719735622, + "timestamp": "2025-09-10 02:35:44.335581", + "step": 412, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:35:44.387685", + "step": 412, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02256178855895996, + "timestamp": "2025-09-10 02:35:44.389478", + "step": 413, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:44.441766", + "step": 413, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036476414650678635, + "timestamp": "2025-09-10 02:35:44.443503", + "step": 414, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:44.495839", + "step": 414, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022130006924271584, + "timestamp": "2025-09-10 02:35:44.498068", + "step": 415, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:44.551011", + "step": 415, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004478184040635824, + "timestamp": "2025-09-10 02:35:44.558203", + "step": 416, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:44.610489", + "step": 416, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02027822472155094, + "timestamp": "2025-09-10 02:35:44.617030", + "step": 417, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:44.670401", + "step": 417, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014351091347634792, + "timestamp": "2025-09-10 02:35:44.672316", + "step": 418, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:44.725370", + "step": 418, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02787570282816887, + "timestamp": "2025-09-10 02:35:44.731775", + "step": 419, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:44.784483", + "step": 419, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04726407676935196, + "timestamp": "2025-09-10 02:35:44.791733", + "step": 420, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:44.845738", + "step": 420, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04083619639277458, + "timestamp": "2025-09-10 02:35:44.853656", + "step": 421, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:44.906705", + "step": 421, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030161907896399498, + "timestamp": "2025-09-10 02:35:44.914478", + "step": 422, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:44.966801", + "step": 422, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01481288019567728, + "timestamp": "2025-09-10 02:35:44.973110", + "step": 423, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:45.026070", + "step": 423, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021902069449424744, + "timestamp": "2025-09-10 02:35:45.032079", + "step": 424, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:45.084546", + "step": 424, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007905379869043827, + "timestamp": "2025-09-10 02:35:45.086442", + "step": 425, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:45.138902", + "step": 425, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010098838247358799, + "timestamp": "2025-09-10 02:35:45.140873", + "step": 426, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:35:45.193897", + "step": 426, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018967723473906517, + "timestamp": "2025-09-10 02:35:45.195895", + "step": 427, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:45.248495", + "step": 427, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01284452062100172, + "timestamp": "2025-09-10 02:35:45.255811", + "step": 428, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:35:45.308398", + "step": 428, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004694396164268255, + "timestamp": "2025-09-10 02:35:45.316389", + "step": 429, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:35:45.370039", + "step": 429, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007329125888645649, + "timestamp": "2025-09-10 02:35:45.376338", + "step": 430, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:45.429364", + "step": 430, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015766726806759834, + "timestamp": "2025-09-10 02:35:45.438932", + "step": 431, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:35:45.506016", + "step": 431, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015795400366187096, + "timestamp": "2025-09-10 02:35:45.511888", + "step": 432, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:45.564028", + "step": 432, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033254317939281464, + "timestamp": "2025-09-10 02:35:45.566005", + "step": 433, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:35:45.618375", + "step": 433, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03296815976500511, + "timestamp": "2025-09-10 02:35:45.620487", + "step": 434, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:45.673569", + "step": 434, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017036251723766327, + "timestamp": "2025-09-10 02:35:45.675552", + "step": 435, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:45.728315", + "step": 435, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008534570224583149, + "timestamp": "2025-09-10 02:35:45.734456", + "step": 436, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:35:45.786602", + "step": 436, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009411263279616833, + "timestamp": "2025-09-10 02:35:45.796414", + "step": 437, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:35:45.854581", + "step": 437, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04341122880578041, + "timestamp": "2025-09-10 02:35:45.865025", + "step": 438, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:35:45.918936", + "step": 438, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012458802200853825, + "timestamp": "2025-09-10 02:35:45.921157", + "step": 439, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:35:45.973888", + "step": 439, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011517300270497799, + "timestamp": "2025-09-10 02:35:45.979872", + "step": 440, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:35:46.032914", + "step": 440, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00763851311057806, + "timestamp": "2025-09-10 02:35:46.034840", + "step": 441, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:36:02.719566", + "step": 441, + "epoch": 1 + }, + { + "type": "pplx", + "content": 20480146.857568886, + "timestamp": "2025-09-10 02:36:02.722030", + "step": 441, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:02.776087", + "step": 441, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0124233802780509, + "timestamp": "2025-09-10 02:36:02.778231", + "step": 442, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:02.831082", + "step": 442, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020152656361460686, + "timestamp": "2025-09-10 02:36:02.833300", + "step": 443, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:02.885571", + "step": 443, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0173350777477026, + "timestamp": "2025-09-10 02:36:02.892717", + "step": 444, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:36:02.952006", + "step": 444, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02373490296304226, + "timestamp": "2025-09-10 02:36:02.964018", + "step": 445, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:03.017730", + "step": 445, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02507105842232704, + "timestamp": "2025-09-10 02:36:03.021468", + "step": 446, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:03.078069", + "step": 446, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019890591502189636, + "timestamp": "2025-09-10 02:36:03.080272", + "step": 447, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:03.134733", + "step": 447, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00882682390511036, + "timestamp": "2025-09-10 02:36:03.145347", + "step": 448, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:03.197438", + "step": 448, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022805573418736458, + "timestamp": "2025-09-10 02:36:03.205430", + "step": 449, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:03.267262", + "step": 449, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014748402871191502, + "timestamp": "2025-09-10 02:36:03.271155", + "step": 450, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:03.323659", + "step": 450, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015324982814490795, + "timestamp": "2025-09-10 02:36:03.326284", + "step": 451, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:03.378495", + "step": 451, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016105040907859802, + "timestamp": "2025-09-10 02:36:03.385599", + "step": 452, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:03.440120", + "step": 452, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016740145161747932, + "timestamp": "2025-09-10 02:36:03.442065", + "step": 453, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:03.495803", + "step": 453, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01827307976782322, + "timestamp": "2025-09-10 02:36:03.505530", + "step": 454, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:03.558128", + "step": 454, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016006184741854668, + "timestamp": "2025-09-10 02:36:03.564319", + "step": 455, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:03.617538", + "step": 455, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004057266749441624, + "timestamp": "2025-09-10 02:36:03.626360", + "step": 456, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:03.677643", + "step": 456, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011827317997813225, + "timestamp": "2025-09-10 02:36:03.680575", + "step": 457, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:03.732486", + "step": 457, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011614165268838406, + "timestamp": "2025-09-10 02:36:03.734665", + "step": 458, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:03.786231", + "step": 458, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00911747757345438, + "timestamp": "2025-09-10 02:36:03.789384", + "step": 459, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:03.841629", + "step": 459, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013866490684449673, + "timestamp": "2025-09-10 02:36:03.847857", + "step": 460, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:03.899443", + "step": 460, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01937706209719181, + "timestamp": "2025-09-10 02:36:03.901484", + "step": 461, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:03.952993", + "step": 461, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011243008077144623, + "timestamp": "2025-09-10 02:36:03.955623", + "step": 462, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:04.013976", + "step": 462, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02434748224914074, + "timestamp": "2025-09-10 02:36:04.024434", + "step": 463, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:04.077714", + "step": 463, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01768920198082924, + "timestamp": "2025-09-10 02:36:04.088136", + "step": 464, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:04.143076", + "step": 464, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02644895575940609, + "timestamp": "2025-09-10 02:36:04.145451", + "step": 465, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:04.197655", + "step": 465, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005339294206351042, + "timestamp": "2025-09-10 02:36:04.199661", + "step": 466, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:04.251792", + "step": 466, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017220327630639076, + "timestamp": "2025-09-10 02:36:04.254883", + "step": 467, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:04.308157", + "step": 467, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01685364916920662, + "timestamp": "2025-09-10 02:36:04.316948", + "step": 468, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:04.368169", + "step": 468, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023372534662485123, + "timestamp": "2025-09-10 02:36:04.374949", + "step": 469, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:04.427238", + "step": 469, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029475973919034004, + "timestamp": "2025-09-10 02:36:04.429899", + "step": 470, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:04.500798", + "step": 470, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02342565916478634, + "timestamp": "2025-09-10 02:36:04.507060", + "step": 471, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:36:04.559079", + "step": 471, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008689814247190952, + "timestamp": "2025-09-10 02:36:04.565437", + "step": 472, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:04.617227", + "step": 472, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01086872536689043, + "timestamp": "2025-09-10 02:36:04.619020", + "step": 473, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:04.670888", + "step": 473, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02115866169333458, + "timestamp": "2025-09-10 02:36:04.672919", + "step": 474, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:04.725188", + "step": 474, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038662366569042206, + "timestamp": "2025-09-10 02:36:04.727837", + "step": 475, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:04.779597", + "step": 475, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018250806257128716, + "timestamp": "2025-09-10 02:36:04.785286", + "step": 476, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:04.839611", + "step": 476, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014851778745651245, + "timestamp": "2025-09-10 02:36:04.841881", + "step": 477, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:04.895584", + "step": 477, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02507244236767292, + "timestamp": "2025-09-10 02:36:04.897693", + "step": 478, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:04.949656", + "step": 478, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0236313845962286, + "timestamp": "2025-09-10 02:36:04.956305", + "step": 479, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:05.018405", + "step": 479, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006937531288713217, + "timestamp": "2025-09-10 02:36:05.024149", + "step": 480, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:05.081336", + "step": 480, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011437739245593548, + "timestamp": "2025-09-10 02:36:05.084002", + "step": 481, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:05.136809", + "step": 481, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01146687287837267, + "timestamp": "2025-09-10 02:36:05.140067", + "step": 482, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:05.192268", + "step": 482, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013129880651831627, + "timestamp": "2025-09-10 02:36:05.194176", + "step": 483, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:05.246171", + "step": 483, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009299784898757935, + "timestamp": "2025-09-10 02:36:05.253744", + "step": 484, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:05.305887", + "step": 484, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028207819908857346, + "timestamp": "2025-09-10 02:36:05.308184", + "step": 485, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:36:05.374709", + "step": 485, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026876013725996017, + "timestamp": "2025-09-10 02:36:05.386948", + "step": 486, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:05.439203", + "step": 486, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005303249694406986, + "timestamp": "2025-09-10 02:36:05.441335", + "step": 487, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:05.492976", + "step": 487, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034863557666540146, + "timestamp": "2025-09-10 02:36:05.498475", + "step": 488, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:05.549879", + "step": 488, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02458783984184265, + "timestamp": "2025-09-10 02:36:05.552952", + "step": 489, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:36:05.624990", + "step": 489, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024885956197977066, + "timestamp": "2025-09-10 02:36:05.637706", + "step": 490, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:05.690236", + "step": 490, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010751993395388126, + "timestamp": "2025-09-10 02:36:05.693442", + "step": 491, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:05.753504", + "step": 491, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016897115856409073, + "timestamp": "2025-09-10 02:36:05.764999", + "step": 492, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:05.817078", + "step": 492, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010212777182459831, + "timestamp": "2025-09-10 02:36:05.819738", + "step": 493, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:05.871976", + "step": 493, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016341043636202812, + "timestamp": "2025-09-10 02:36:05.874186", + "step": 494, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:05.926778", + "step": 494, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008837589994072914, + "timestamp": "2025-09-10 02:36:05.928859", + "step": 495, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:05.980904", + "step": 495, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014222336933016777, + "timestamp": "2025-09-10 02:36:05.986576", + "step": 496, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:36:06.038893", + "step": 496, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009091884829103947, + "timestamp": "2025-09-10 02:36:06.040871", + "step": 497, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:36:06.108021", + "step": 497, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010018201544880867, + "timestamp": "2025-09-10 02:36:06.120621", + "step": 498, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:36:06.172366", + "step": 498, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008286223746836185, + "timestamp": "2025-09-10 02:36:06.174904", + "step": 499, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:06.227178", + "step": 499, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01953982003033161, + "timestamp": "2025-09-10 02:36:06.234408", + "step": 500, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 500", + "timestamp": "2025-09-10 02:36:06.661874", + "step": 500, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:06.716314", + "step": 500, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00831193383783102, + "timestamp": "2025-09-10 02:36:06.718332", + "step": 501, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:06.772160", + "step": 501, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00853397324681282, + "timestamp": "2025-09-10 02:36:06.778052", + "step": 502, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:06.831020", + "step": 502, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031167268753051758, + "timestamp": "2025-09-10 02:36:06.833561", + "step": 503, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:06.886327", + "step": 503, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016558272764086723, + "timestamp": "2025-09-10 02:36:06.893870", + "step": 504, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:36:06.952637", + "step": 504, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018046345561742783, + "timestamp": "2025-09-10 02:36:06.964395", + "step": 505, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:07.017104", + "step": 505, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029076406732201576, + "timestamp": "2025-09-10 02:36:07.019594", + "step": 506, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:07.071474", + "step": 506, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034537460654973984, + "timestamp": "2025-09-10 02:36:07.074495", + "step": 507, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:07.127200", + "step": 507, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00975030567497015, + "timestamp": "2025-09-10 02:36:07.133154", + "step": 508, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:07.184883", + "step": 508, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014805578626692295, + "timestamp": "2025-09-10 02:36:07.194905", + "step": 509, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:07.249070", + "step": 509, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014143081381917, + "timestamp": "2025-09-10 02:36:07.258856", + "step": 510, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:36:07.320233", + "step": 510, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010913644917309284, + "timestamp": "2025-09-10 02:36:07.331138", + "step": 511, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:07.384533", + "step": 511, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029624953866004944, + "timestamp": "2025-09-10 02:36:07.391750", + "step": 512, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:07.445189", + "step": 512, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029218172654509544, + "timestamp": "2025-09-10 02:36:07.448118", + "step": 513, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:36:07.515811", + "step": 513, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03561558574438095, + "timestamp": "2025-09-10 02:36:07.528394", + "step": 514, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:07.581779", + "step": 514, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013204547576606274, + "timestamp": "2025-09-10 02:36:07.583732", + "step": 515, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:07.644002", + "step": 515, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01040361262857914, + "timestamp": "2025-09-10 02:36:07.655535", + "step": 516, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:07.711951", + "step": 516, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033751796931028366, + "timestamp": "2025-09-10 02:36:07.723201", + "step": 517, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:07.776678", + "step": 517, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011231355369091034, + "timestamp": "2025-09-10 02:36:07.778759", + "step": 518, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:07.831714", + "step": 518, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02144441194832325, + "timestamp": "2025-09-10 02:36:07.839969", + "step": 519, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:07.896218", + "step": 519, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04766199365258217, + "timestamp": "2025-09-10 02:36:07.902017", + "step": 520, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:07.954486", + "step": 520, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012711511924862862, + "timestamp": "2025-09-10 02:36:07.956778", + "step": 521, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:08.010041", + "step": 521, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008068517781794071, + "timestamp": "2025-09-10 02:36:08.011975", + "step": 522, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:08.065230", + "step": 522, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022473914548754692, + "timestamp": "2025-09-10 02:36:08.074775", + "step": 523, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:08.128131", + "step": 523, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010958467610180378, + "timestamp": "2025-09-10 02:36:08.133655", + "step": 524, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:08.185366", + "step": 524, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024017466232180595, + "timestamp": "2025-09-10 02:36:08.187158", + "step": 525, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:08.239100", + "step": 525, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024791982024908066, + "timestamp": "2025-09-10 02:36:08.242352", + "step": 526, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:08.294833", + "step": 526, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016966592520475388, + "timestamp": "2025-09-10 02:36:08.296917", + "step": 527, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:08.349888", + "step": 527, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02015499584376812, + "timestamp": "2025-09-10 02:36:08.357326", + "step": 528, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:08.411110", + "step": 528, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006627053488045931, + "timestamp": "2025-09-10 02:36:08.413499", + "step": 529, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:08.465237", + "step": 529, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02907853201031685, + "timestamp": "2025-09-10 02:36:08.468039", + "step": 530, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:08.524650", + "step": 530, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019893009215593338, + "timestamp": "2025-09-10 02:36:08.530405", + "step": 531, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:08.583041", + "step": 531, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020581111311912537, + "timestamp": "2025-09-10 02:36:08.588893", + "step": 532, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:36:08.660550", + "step": 532, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011427761986851692, + "timestamp": "2025-09-10 02:36:08.675465", + "step": 533, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:08.727804", + "step": 533, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03970498591661453, + "timestamp": "2025-09-10 02:36:08.729826", + "step": 534, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:08.782080", + "step": 534, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016118813306093216, + "timestamp": "2025-09-10 02:36:08.784035", + "step": 535, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 688 + ], + "flops": 13760083599040.0 + }, + "timestamp": "2025-09-10 02:36:08.885129", + "step": 535, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011583237908780575, + "timestamp": "2025-09-10 02:36:08.905030", + "step": 536, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:08.957150", + "step": 536, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009176967665553093, + "timestamp": "2025-09-10 02:36:08.959175", + "step": 537, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:09.011584", + "step": 537, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02078993245959282, + "timestamp": "2025-09-10 02:36:09.014506", + "step": 538, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:09.067594", + "step": 538, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016172481700778008, + "timestamp": "2025-09-10 02:36:09.069633", + "step": 539, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:09.122192", + "step": 539, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01667824201285839, + "timestamp": "2025-09-10 02:36:09.128051", + "step": 540, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:09.181493", + "step": 540, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013945282436907291, + "timestamp": "2025-09-10 02:36:09.191113", + "step": 541, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:09.244341", + "step": 541, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007478418760001659, + "timestamp": "2025-09-10 02:36:09.246739", + "step": 542, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:09.299215", + "step": 542, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02043714001774788, + "timestamp": "2025-09-10 02:36:09.301507", + "step": 543, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:36:09.367299", + "step": 543, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0241412166506052, + "timestamp": "2025-09-10 02:36:09.380322", + "step": 544, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:09.431888", + "step": 544, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036015719175338745, + "timestamp": "2025-09-10 02:36:09.434069", + "step": 545, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 560 + ], + "flops": 11200068058304.0 + }, + "timestamp": "2025-09-10 02:36:09.518501", + "step": 545, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011378358118236065, + "timestamp": "2025-09-10 02:36:09.533910", + "step": 546, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:09.587319", + "step": 546, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011544203385710716, + "timestamp": "2025-09-10 02:36:09.589563", + "step": 547, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:09.642433", + "step": 547, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007249454967677593, + "timestamp": "2025-09-10 02:36:09.649910", + "step": 548, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:09.702271", + "step": 548, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031287916004657745, + "timestamp": "2025-09-10 02:36:09.704461", + "step": 549, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:09.757222", + "step": 549, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006979082711040974, + "timestamp": "2025-09-10 02:36:09.759554", + "step": 550, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:09.813347", + "step": 550, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018987594172358513, + "timestamp": "2025-09-10 02:36:09.823181", + "step": 551, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:09.877032", + "step": 551, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01209915429353714, + "timestamp": "2025-09-10 02:36:09.887450", + "step": 552, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:09.939490", + "step": 552, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005028535611927509, + "timestamp": "2025-09-10 02:36:09.941459", + "step": 553, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:09.994551", + "step": 553, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015660250559449196, + "timestamp": "2025-09-10 02:36:10.004155", + "step": 554, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:10.056382", + "step": 554, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009114133194088936, + "timestamp": "2025-09-10 02:36:10.058550", + "step": 555, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:10.111047", + "step": 555, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00943037960678339, + "timestamp": "2025-09-10 02:36:10.116916", + "step": 556, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:10.168778", + "step": 556, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02232159674167633, + "timestamp": "2025-09-10 02:36:10.170956", + "step": 557, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:36:10.232156", + "step": 557, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010626793839037418, + "timestamp": "2025-09-10 02:36:10.243225", + "step": 558, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:10.295664", + "step": 558, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015199241228401661, + "timestamp": "2025-09-10 02:36:10.297864", + "step": 559, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:10.350348", + "step": 559, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011815892532467842, + "timestamp": "2025-09-10 02:36:10.356200", + "step": 560, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:10.407943", + "step": 560, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024737147614359856, + "timestamp": "2025-09-10 02:36:10.409877", + "step": 561, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:10.462886", + "step": 561, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006226534489542246, + "timestamp": "2025-09-10 02:36:10.472481", + "step": 562, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:36:10.533053", + "step": 562, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003087083576247096, + "timestamp": "2025-09-10 02:36:10.543985", + "step": 563, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:10.596768", + "step": 563, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024480143561959267, + "timestamp": "2025-09-10 02:36:10.602552", + "step": 564, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:10.655483", + "step": 564, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008550086989998817, + "timestamp": "2025-09-10 02:36:10.657533", + "step": 565, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:10.710065", + "step": 565, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007321577053517103, + "timestamp": "2025-09-10 02:36:10.718065", + "step": 566, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:10.770682", + "step": 566, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005716038402169943, + "timestamp": "2025-09-10 02:36:10.772740", + "step": 567, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:10.825461", + "step": 567, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009402218274772167, + "timestamp": "2025-09-10 02:36:10.831256", + "step": 568, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:10.883109", + "step": 568, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023248685523867607, + "timestamp": "2025-09-10 02:36:10.885066", + "step": 569, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:10.937806", + "step": 569, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0069808135740458965, + "timestamp": "2025-09-10 02:36:10.944057", + "step": 570, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:10.997112", + "step": 570, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022818030789494514, + "timestamp": "2025-09-10 02:36:10.999517", + "step": 571, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:11.052966", + "step": 571, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014487753622233868, + "timestamp": "2025-09-10 02:36:11.058594", + "step": 572, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:36:11.133095", + "step": 572, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018413782119750977, + "timestamp": "2025-09-10 02:36:11.147668", + "step": 573, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:11.202679", + "step": 573, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01510600745677948, + "timestamp": "2025-09-10 02:36:11.212444", + "step": 574, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:11.265578", + "step": 574, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010426660999655724, + "timestamp": "2025-09-10 02:36:11.267657", + "step": 575, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:11.321024", + "step": 575, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019595544785261154, + "timestamp": "2025-09-10 02:36:11.326608", + "step": 576, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:11.378933", + "step": 576, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017996158450841904, + "timestamp": "2025-09-10 02:36:11.386774", + "step": 577, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:11.444891", + "step": 577, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046230610460042953, + "timestamp": "2025-09-10 02:36:11.455309", + "step": 578, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:11.508247", + "step": 578, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010992661118507385, + "timestamp": "2025-09-10 02:36:11.510158", + "step": 579, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:11.562935", + "step": 579, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033359068911522627, + "timestamp": "2025-09-10 02:36:11.570313", + "step": 580, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:11.622488", + "step": 580, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014831820502877235, + "timestamp": "2025-09-10 02:36:11.625324", + "step": 581, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:11.677671", + "step": 581, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04670983552932739, + "timestamp": "2025-09-10 02:36:11.679904", + "step": 582, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:11.732581", + "step": 582, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027669671922922134, + "timestamp": "2025-09-10 02:36:11.734695", + "step": 583, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:11.788004", + "step": 583, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021408479660749435, + "timestamp": "2025-09-10 02:36:11.798623", + "step": 584, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:11.850912", + "step": 584, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0021256650798022747, + "timestamp": "2025-09-10 02:36:11.853135", + "step": 585, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:11.905403", + "step": 585, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01679086498916149, + "timestamp": "2025-09-10 02:36:11.913766", + "step": 586, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:11.966122", + "step": 586, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02040312997996807, + "timestamp": "2025-09-10 02:36:11.968181", + "step": 587, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:12.020430", + "step": 587, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004376889206469059, + "timestamp": "2025-09-10 02:36:12.026033", + "step": 588, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:36:28.785153", + "step": 588, + "epoch": 1 + }, + { + "type": "pplx", + "content": 23128982.936499126, + "timestamp": "2025-09-10 02:36:28.787845", + "step": 588, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 816 + ], + "flops": 16320099139776.0 + }, + "timestamp": "2025-09-10 02:36:28.906038", + "step": 588, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007238972466439009, + "timestamp": "2025-09-10 02:36:28.931302", + "step": 589, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:28.986499", + "step": 589, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016987601295113564, + "timestamp": "2025-09-10 02:36:28.988792", + "step": 590, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:29.042108", + "step": 590, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015001409687101841, + "timestamp": "2025-09-10 02:36:29.047805", + "step": 591, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:29.102058", + "step": 591, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009109629318118095, + "timestamp": "2025-09-10 02:36:29.112440", + "step": 592, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:29.165068", + "step": 592, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004366945009678602, + "timestamp": "2025-09-10 02:36:29.167066", + "step": 593, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:29.219664", + "step": 593, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007493204902857542, + "timestamp": "2025-09-10 02:36:29.222522", + "step": 594, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:29.275397", + "step": 594, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01290935929864645, + "timestamp": "2025-09-10 02:36:29.277549", + "step": 595, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:36:29.339415", + "step": 595, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011629750020802021, + "timestamp": "2025-09-10 02:36:29.351296", + "step": 596, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:29.404816", + "step": 596, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009904067032039165, + "timestamp": "2025-09-10 02:36:29.407001", + "step": 597, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:29.460019", + "step": 597, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0404568575322628, + "timestamp": "2025-09-10 02:36:29.462404", + "step": 598, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:29.515106", + "step": 598, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012372517958283424, + "timestamp": "2025-09-10 02:36:29.517860", + "step": 599, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:29.571671", + "step": 599, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0342080220580101, + "timestamp": "2025-09-10 02:36:29.578062", + "step": 600, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:29.630565", + "step": 600, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018423749133944511, + "timestamp": "2025-09-10 02:36:29.632374", + "step": 601, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:29.685412", + "step": 601, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020080851390957832, + "timestamp": "2025-09-10 02:36:29.687488", + "step": 602, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:29.740324", + "step": 602, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014774338342249393, + "timestamp": "2025-09-10 02:36:29.749925", + "step": 603, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:29.804789", + "step": 603, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05874253436923027, + "timestamp": "2025-09-10 02:36:29.815332", + "step": 604, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:36:29.875246", + "step": 604, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0313432440161705, + "timestamp": "2025-09-10 02:36:29.887074", + "step": 605, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:29.940181", + "step": 605, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00326648005284369, + "timestamp": "2025-09-10 02:36:29.942425", + "step": 606, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:29.995357", + "step": 606, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009223191998898983, + "timestamp": "2025-09-10 02:36:30.003407", + "step": 607, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:30.056785", + "step": 607, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0024524382315576077, + "timestamp": "2025-09-10 02:36:30.063048", + "step": 608, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:30.115230", + "step": 608, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004639843013137579, + "timestamp": "2025-09-10 02:36:30.117350", + "step": 609, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:30.170184", + "step": 609, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00964688416570425, + "timestamp": "2025-09-10 02:36:30.172336", + "step": 610, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:30.224931", + "step": 610, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004201619885861874, + "timestamp": "2025-09-10 02:36:30.227238", + "step": 611, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:30.280871", + "step": 611, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028701484203338623, + "timestamp": "2025-09-10 02:36:30.287819", + "step": 612, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:30.344675", + "step": 612, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010117202997207642, + "timestamp": "2025-09-10 02:36:30.355887", + "step": 613, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:30.410187", + "step": 613, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00475263362750411, + "timestamp": "2025-09-10 02:36:30.418951", + "step": 614, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:30.472571", + "step": 614, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022213634103536606, + "timestamp": "2025-09-10 02:36:30.474783", + "step": 615, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:30.534626", + "step": 615, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006725195329636335, + "timestamp": "2025-09-10 02:36:30.546163", + "step": 616, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:30.598424", + "step": 616, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008890500292181969, + "timestamp": "2025-09-10 02:36:30.600352", + "step": 617, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:30.652887", + "step": 617, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002376921707764268, + "timestamp": "2025-09-10 02:36:30.655119", + "step": 618, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:30.707669", + "step": 618, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0235095527023077, + "timestamp": "2025-09-10 02:36:30.710541", + "step": 619, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:30.763039", + "step": 619, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01089462824165821, + "timestamp": "2025-09-10 02:36:30.770349", + "step": 620, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:30.822781", + "step": 620, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015000701881945133, + "timestamp": "2025-09-10 02:36:30.830859", + "step": 621, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:30.883674", + "step": 621, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001055436092428863, + "timestamp": "2025-09-10 02:36:30.885781", + "step": 622, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:30.937819", + "step": 622, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0067395372316241264, + "timestamp": "2025-09-10 02:36:30.940756", + "step": 623, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:36:31.000356", + "step": 623, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005097785033285618, + "timestamp": "2025-09-10 02:36:31.006103", + "step": 624, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:31.058373", + "step": 624, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02528545819222927, + "timestamp": "2025-09-10 02:36:31.060384", + "step": 625, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:31.112756", + "step": 625, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008685345761477947, + "timestamp": "2025-09-10 02:36:31.114953", + "step": 626, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:31.168616", + "step": 626, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004012659192085266, + "timestamp": "2025-09-10 02:36:31.171007", + "step": 627, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:31.223491", + "step": 627, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01806785725057125, + "timestamp": "2025-09-10 02:36:31.230736", + "step": 628, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:31.283277", + "step": 628, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03684353828430176, + "timestamp": "2025-09-10 02:36:31.285235", + "step": 629, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:31.338120", + "step": 629, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03313937410712242, + "timestamp": "2025-09-10 02:36:31.347733", + "step": 630, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:31.400127", + "step": 630, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006307187490165234, + "timestamp": "2025-09-10 02:36:31.402508", + "step": 631, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:31.455215", + "step": 631, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016396130667999387, + "timestamp": "2025-09-10 02:36:31.461031", + "step": 632, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:31.516894", + "step": 632, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017854390665888786, + "timestamp": "2025-09-10 02:36:31.518888", + "step": 633, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:31.571519", + "step": 633, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05947822704911232, + "timestamp": "2025-09-10 02:36:31.573435", + "step": 634, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:31.627211", + "step": 634, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06303369253873825, + "timestamp": "2025-09-10 02:36:31.629467", + "step": 635, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:31.682807", + "step": 635, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010691377334296703, + "timestamp": "2025-09-10 02:36:31.693178", + "step": 636, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:31.745800", + "step": 636, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06338184326887131, + "timestamp": "2025-09-10 02:36:31.747771", + "step": 637, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:31.800004", + "step": 637, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03403312340378761, + "timestamp": "2025-09-10 02:36:31.802144", + "step": 638, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:31.855178", + "step": 638, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009687677025794983, + "timestamp": "2025-09-10 02:36:31.857290", + "step": 639, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:31.911472", + "step": 639, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017506403382867575, + "timestamp": "2025-09-10 02:36:31.922013", + "step": 640, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:31.974950", + "step": 640, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02189011499285698, + "timestamp": "2025-09-10 02:36:31.985455", + "step": 641, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:32.038278", + "step": 641, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02935587801039219, + "timestamp": "2025-09-10 02:36:32.042004", + "step": 642, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:32.097238", + "step": 642, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026329005137085915, + "timestamp": "2025-09-10 02:36:32.099303", + "step": 643, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:32.152285", + "step": 643, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03041278012096882, + "timestamp": "2025-09-10 02:36:32.162696", + "step": 644, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:32.214655", + "step": 644, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00822833739221096, + "timestamp": "2025-09-10 02:36:32.216880", + "step": 645, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:32.269165", + "step": 645, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013744184747338295, + "timestamp": "2025-09-10 02:36:32.279979", + "step": 646, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:32.334900", + "step": 646, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019500361755490303, + "timestamp": "2025-09-10 02:36:32.337562", + "step": 647, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:32.390147", + "step": 647, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037625690456479788, + "timestamp": "2025-09-10 02:36:32.396092", + "step": 648, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:32.448397", + "step": 648, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011067330837249756, + "timestamp": "2025-09-10 02:36:32.450449", + "step": 649, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:36:32.516392", + "step": 649, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022867491468787193, + "timestamp": "2025-09-10 02:36:32.528555", + "step": 650, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:32.582179", + "step": 650, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003159865038469434, + "timestamp": "2025-09-10 02:36:32.584348", + "step": 651, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:32.636630", + "step": 651, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009259316138923168, + "timestamp": "2025-09-10 02:36:32.642416", + "step": 652, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:36:32.701591", + "step": 652, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020939616486430168, + "timestamp": "2025-09-10 02:36:32.713354", + "step": 653, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:32.766577", + "step": 653, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033403415232896805, + "timestamp": "2025-09-10 02:36:32.768710", + "step": 654, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:32.821144", + "step": 654, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013647467829287052, + "timestamp": "2025-09-10 02:36:32.827577", + "step": 655, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:32.881267", + "step": 655, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006164844613522291, + "timestamp": "2025-09-10 02:36:32.891662", + "step": 656, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:32.944341", + "step": 656, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01957377977669239, + "timestamp": "2025-09-10 02:36:32.954831", + "step": 657, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:33.007542", + "step": 657, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04672471061348915, + "timestamp": "2025-09-10 02:36:33.009473", + "step": 658, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:33.061937", + "step": 658, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016188517212867737, + "timestamp": "2025-09-10 02:36:33.064534", + "step": 659, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:33.116889", + "step": 659, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04000651836395264, + "timestamp": "2025-09-10 02:36:33.122853", + "step": 660, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:33.175080", + "step": 660, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01387543324381113, + "timestamp": "2025-09-10 02:36:33.177065", + "step": 661, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:33.229910", + "step": 661, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01652139239013195, + "timestamp": "2025-09-10 02:36:33.239781", + "step": 662, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:33.292797", + "step": 662, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021793778985738754, + "timestamp": "2025-09-10 02:36:33.306016", + "step": 663, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:33.365074", + "step": 663, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016380032524466515, + "timestamp": "2025-09-10 02:36:33.375489", + "step": 664, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:33.435226", + "step": 664, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009158218279480934, + "timestamp": "2025-09-10 02:36:33.438264", + "step": 665, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:33.496788", + "step": 665, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029515789821743965, + "timestamp": "2025-09-10 02:36:33.499204", + "step": 666, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:33.558996", + "step": 666, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02951805293560028, + "timestamp": "2025-09-10 02:36:33.566622", + "step": 667, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:33.628662", + "step": 667, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00808575190603733, + "timestamp": "2025-09-10 02:36:33.635127", + "step": 668, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:33.689360", + "step": 668, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017804021015763283, + "timestamp": "2025-09-10 02:36:33.691517", + "step": 669, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:33.746033", + "step": 669, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035620737820863724, + "timestamp": "2025-09-10 02:36:33.751120", + "step": 670, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:33.805083", + "step": 670, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009280617348849773, + "timestamp": "2025-09-10 02:36:33.809737", + "step": 671, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:33.871574", + "step": 671, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009552262723445892, + "timestamp": "2025-09-10 02:36:33.882756", + "step": 672, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:33.943019", + "step": 672, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017052967101335526, + "timestamp": "2025-09-10 02:36:33.945794", + "step": 673, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:36:34.019672", + "step": 673, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034075286239385605, + "timestamp": "2025-09-10 02:36:34.032318", + "step": 674, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:34.102378", + "step": 674, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019345467910170555, + "timestamp": "2025-09-10 02:36:34.111256", + "step": 675, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:34.166556", + "step": 675, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015054549090564251, + "timestamp": "2025-09-10 02:36:34.173281", + "step": 676, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:34.243342", + "step": 676, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009510613977909088, + "timestamp": "2025-09-10 02:36:34.246974", + "step": 677, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:34.301564", + "step": 677, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027910476550459862, + "timestamp": "2025-09-10 02:36:34.305992", + "step": 678, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:34.362296", + "step": 678, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00868389755487442, + "timestamp": "2025-09-10 02:36:34.364412", + "step": 679, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:34.438507", + "step": 679, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01145507674664259, + "timestamp": "2025-09-10 02:36:34.453410", + "step": 680, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:34.521391", + "step": 680, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01435221266001463, + "timestamp": "2025-09-10 02:36:34.531793", + "step": 681, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:34.585549", + "step": 681, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008981848135590553, + "timestamp": "2025-09-10 02:36:34.591528", + "step": 682, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:34.659149", + "step": 682, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02495545521378517, + "timestamp": "2025-09-10 02:36:34.661918", + "step": 683, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:34.715720", + "step": 683, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02103118598461151, + "timestamp": "2025-09-10 02:36:34.722918", + "step": 684, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:36:34.789236", + "step": 684, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022110581398010254, + "timestamp": "2025-09-10 02:36:34.802428", + "step": 685, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:34.870682", + "step": 685, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026706701144576073, + "timestamp": "2025-09-10 02:36:34.873551", + "step": 686, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:34.927608", + "step": 686, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019403820857405663, + "timestamp": "2025-09-10 02:36:34.929951", + "step": 687, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:34.987799", + "step": 687, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015205049887299538, + "timestamp": "2025-09-10 02:36:35.003554", + "step": 688, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:35.064158", + "step": 688, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025768402963876724, + "timestamp": "2025-09-10 02:36:35.073503", + "step": 689, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:35.130980", + "step": 689, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014641908928751945, + "timestamp": "2025-09-10 02:36:35.140619", + "step": 690, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:35.237956", + "step": 690, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015722578391432762, + "timestamp": "2025-09-10 02:36:35.243068", + "step": 691, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:35.304138", + "step": 691, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012852429412305355, + "timestamp": "2025-09-10 02:36:35.310116", + "step": 692, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:35.363036", + "step": 692, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0215513464063406, + "timestamp": "2025-09-10 02:36:35.364999", + "step": 693, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:35.417127", + "step": 693, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009072760120034218, + "timestamp": "2025-09-10 02:36:35.420032", + "step": 694, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:35.472778", + "step": 694, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01474462728947401, + "timestamp": "2025-09-10 02:36:35.475181", + "step": 695, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:36:35.544437", + "step": 695, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02467481978237629, + "timestamp": "2025-09-10 02:36:35.557932", + "step": 696, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:35.610313", + "step": 696, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007700375281274319, + "timestamp": "2025-09-10 02:36:35.618466", + "step": 697, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:35.672719", + "step": 697, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010380787774920464, + "timestamp": "2025-09-10 02:36:35.674748", + "step": 698, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:35.729032", + "step": 698, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02106519415974617, + "timestamp": "2025-09-10 02:36:35.731103", + "step": 699, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:35.785861", + "step": 699, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02622860297560692, + "timestamp": "2025-09-10 02:36:35.792038", + "step": 700, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:35.845270", + "step": 700, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013282016851007938, + "timestamp": "2025-09-10 02:36:35.855743", + "step": 701, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:35.909897", + "step": 701, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013056610710918903, + "timestamp": "2025-09-10 02:36:35.911874", + "step": 702, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:35.964540", + "step": 702, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02552831545472145, + "timestamp": "2025-09-10 02:36:35.972621", + "step": 703, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:36.025437", + "step": 703, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02205856330692768, + "timestamp": "2025-09-10 02:36:36.031114", + "step": 704, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:36.084678", + "step": 704, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00926896370947361, + "timestamp": "2025-09-10 02:36:36.095112", + "step": 705, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:36.147786", + "step": 705, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00949943345040083, + "timestamp": "2025-09-10 02:36:36.150059", + "step": 706, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:36.203676", + "step": 706, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014659402891993523, + "timestamp": "2025-09-10 02:36:36.212800", + "step": 707, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:36.270135", + "step": 707, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016407150775194168, + "timestamp": "2025-09-10 02:36:36.281343", + "step": 708, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:36.333802", + "step": 708, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014719000086188316, + "timestamp": "2025-09-10 02:36:36.335765", + "step": 709, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:36.389778", + "step": 709, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018781915307044983, + "timestamp": "2025-09-10 02:36:36.391976", + "step": 710, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:36.449608", + "step": 710, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01804245077073574, + "timestamp": "2025-09-10 02:36:36.460017", + "step": 711, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:36:36.513781", + "step": 711, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015068510547280312, + "timestamp": "2025-09-10 02:36:36.519883", + "step": 712, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:36.573010", + "step": 712, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017627691850066185, + "timestamp": "2025-09-10 02:36:36.578574", + "step": 713, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:36.632003", + "step": 713, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016136599704623222, + "timestamp": "2025-09-10 02:36:36.634254", + "step": 714, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:36.687693", + "step": 714, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009769621305167675, + "timestamp": "2025-09-10 02:36:36.689891", + "step": 715, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:36.742774", + "step": 715, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01921756938099861, + "timestamp": "2025-09-10 02:36:36.749819", + "step": 716, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:36.808788", + "step": 716, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006998592056334019, + "timestamp": "2025-09-10 02:36:36.820335", + "step": 717, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:36.872717", + "step": 717, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01364762894809246, + "timestamp": "2025-09-10 02:36:36.880834", + "step": 718, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:36:36.947103", + "step": 718, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0220673568546772, + "timestamp": "2025-09-10 02:36:36.959332", + "step": 719, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:37.012469", + "step": 719, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011288968846201897, + "timestamp": "2025-09-10 02:36:37.021549", + "step": 720, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:37.073860", + "step": 720, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005582908634096384, + "timestamp": "2025-09-10 02:36:37.076040", + "step": 721, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:37.128587", + "step": 721, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029105016961693764, + "timestamp": "2025-09-10 02:36:37.136904", + "step": 722, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:37.189685", + "step": 722, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015418858267366886, + "timestamp": "2025-09-10 02:36:37.191939", + "step": 723, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:37.244895", + "step": 723, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018920283764600754, + "timestamp": "2025-09-10 02:36:37.250667", + "step": 724, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:37.303815", + "step": 724, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029933378100395203, + "timestamp": "2025-09-10 02:36:37.305856", + "step": 725, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:37.358653", + "step": 725, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010641084052622318, + "timestamp": "2025-09-10 02:36:37.360840", + "step": 726, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:37.413621", + "step": 726, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019988510757684708, + "timestamp": "2025-09-10 02:36:37.415458", + "step": 727, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:37.468347", + "step": 727, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02792045660316944, + "timestamp": "2025-09-10 02:36:37.474448", + "step": 728, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:36:37.540184", + "step": 728, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0059188418090343475, + "timestamp": "2025-09-10 02:36:37.553811", + "step": 729, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:37.607463", + "step": 729, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013464928604662418, + "timestamp": "2025-09-10 02:36:37.609820", + "step": 730, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:37.663794", + "step": 730, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004084447864443064, + "timestamp": "2025-09-10 02:36:37.673635", + "step": 731, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:37.726595", + "step": 731, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01191288884729147, + "timestamp": "2025-09-10 02:36:37.732270", + "step": 732, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:37.784313", + "step": 732, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0049387919716537, + "timestamp": "2025-09-10 02:36:37.792278", + "step": 733, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:37.844978", + "step": 733, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011774830520153046, + "timestamp": "2025-09-10 02:36:37.846891", + "step": 734, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:37.899891", + "step": 734, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024851668626070023, + "timestamp": "2025-09-10 02:36:37.901835", + "step": 735, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:36:54.667663", + "step": 735, + "epoch": 1 + }, + { + "type": "pplx", + "content": 21917958.077079695, + "timestamp": "2025-09-10 02:36:54.670400", + "step": 735, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:54.727380", + "step": 735, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020502442494034767, + "timestamp": "2025-09-10 02:36:54.738502", + "step": 736, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:36:54.803573", + "step": 736, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015638506039977074, + "timestamp": "2025-09-10 02:36:54.816701", + "step": 737, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:54.870020", + "step": 737, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00676635792478919, + "timestamp": "2025-09-10 02:36:54.871943", + "step": 738, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:54.924485", + "step": 738, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008608072064816952, + "timestamp": "2025-09-10 02:36:54.927329", + "step": 739, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:36:54.979296", + "step": 739, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00817218329757452, + "timestamp": "2025-09-10 02:36:54.985279", + "step": 740, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:55.037487", + "step": 740, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023427298292517662, + "timestamp": "2025-09-10 02:36:55.039930", + "step": 741, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:55.093074", + "step": 741, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027461376041173935, + "timestamp": "2025-09-10 02:36:55.095209", + "step": 742, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:55.147794", + "step": 742, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030104925390332937, + "timestamp": "2025-09-10 02:36:55.149737", + "step": 743, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:55.202263", + "step": 743, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0279100202023983, + "timestamp": "2025-09-10 02:36:55.208136", + "step": 744, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:55.260953", + "step": 744, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009477927349507809, + "timestamp": "2025-09-10 02:36:55.262916", + "step": 745, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:55.317341", + "step": 745, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021144181489944458, + "timestamp": "2025-09-10 02:36:55.327145", + "step": 746, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:55.380394", + "step": 746, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0071835569106042385, + "timestamp": "2025-09-10 02:36:55.382474", + "step": 747, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:55.435171", + "step": 747, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015094847418367863, + "timestamp": "2025-09-10 02:36:55.442413", + "step": 748, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:55.494682", + "step": 748, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014481599442660809, + "timestamp": "2025-09-10 02:36:55.497028", + "step": 749, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:55.554558", + "step": 749, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032352295238524675, + "timestamp": "2025-09-10 02:36:55.564980", + "step": 750, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:55.619465", + "step": 750, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008319184184074402, + "timestamp": "2025-09-10 02:36:55.621413", + "step": 751, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:55.673805", + "step": 751, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001629153499379754, + "timestamp": "2025-09-10 02:36:55.679541", + "step": 752, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:55.732181", + "step": 752, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013639301992952824, + "timestamp": "2025-09-10 02:36:55.740543", + "step": 753, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:55.793714", + "step": 753, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0051920460537076, + "timestamp": "2025-09-10 02:36:55.802009", + "step": 754, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:55.855478", + "step": 754, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0006103878258727491, + "timestamp": "2025-09-10 02:36:55.858301", + "step": 755, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:55.911629", + "step": 755, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014334793202579021, + "timestamp": "2025-09-10 02:36:55.917623", + "step": 756, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:55.969951", + "step": 756, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01660270243883133, + "timestamp": "2025-09-10 02:36:55.971904", + "step": 757, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:36:56.045152", + "step": 757, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03916054591536522, + "timestamp": "2025-09-10 02:36:56.058862", + "step": 758, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:56.113331", + "step": 758, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.057445406913757324, + "timestamp": "2025-09-10 02:36:56.123151", + "step": 759, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:56.177670", + "step": 759, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04359531030058861, + "timestamp": "2025-09-10 02:36:56.183533", + "step": 760, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:56.234980", + "step": 760, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016469566151499748, + "timestamp": "2025-09-10 02:36:56.237128", + "step": 761, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:56.291946", + "step": 761, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021483054384589195, + "timestamp": "2025-09-10 02:36:56.301716", + "step": 762, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:36:56.362917", + "step": 762, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007063302677124739, + "timestamp": "2025-09-10 02:36:56.373986", + "step": 763, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:56.427233", + "step": 763, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007045641075819731, + "timestamp": "2025-09-10 02:36:56.434308", + "step": 764, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:56.486772", + "step": 764, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035644493997097015, + "timestamp": "2025-09-10 02:36:56.488986", + "step": 765, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:56.541941", + "step": 765, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009285603649914265, + "timestamp": "2025-09-10 02:36:56.544150", + "step": 766, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:36:56.610635", + "step": 766, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031007337383925915, + "timestamp": "2025-09-10 02:36:56.622836", + "step": 767, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:56.675781", + "step": 767, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009259874001145363, + "timestamp": "2025-09-10 02:36:56.681806", + "step": 768, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:56.733740", + "step": 768, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01143709011375904, + "timestamp": "2025-09-10 02:36:56.736082", + "step": 769, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:56.789671", + "step": 769, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017447659745812416, + "timestamp": "2025-09-10 02:36:56.799278", + "step": 770, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:56.852742", + "step": 770, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01954110898077488, + "timestamp": "2025-09-10 02:36:56.854921", + "step": 771, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:56.907448", + "step": 771, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016713453456759453, + "timestamp": "2025-09-10 02:36:56.913290", + "step": 772, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:56.965105", + "step": 772, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019992290064692497, + "timestamp": "2025-09-10 02:36:56.975387", + "step": 773, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:57.028370", + "step": 773, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00534540181979537, + "timestamp": "2025-09-10 02:36:57.030486", + "step": 774, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:57.083622", + "step": 774, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010700692422688007, + "timestamp": "2025-09-10 02:36:57.085776", + "step": 775, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:57.138182", + "step": 775, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03621980547904968, + "timestamp": "2025-09-10 02:36:57.144186", + "step": 776, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:57.196717", + "step": 776, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01841093972325325, + "timestamp": "2025-09-10 02:36:57.198808", + "step": 777, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:57.251782", + "step": 777, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009724624454975128, + "timestamp": "2025-09-10 02:36:57.259937", + "step": 778, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:57.312696", + "step": 778, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005586319603025913, + "timestamp": "2025-09-10 02:36:57.314785", + "step": 779, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:57.368462", + "step": 779, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012120272032916546, + "timestamp": "2025-09-10 02:36:57.374242", + "step": 780, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:57.426180", + "step": 780, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0129274632781744, + "timestamp": "2025-09-10 02:36:57.429033", + "step": 781, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:57.489694", + "step": 781, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014876616187393665, + "timestamp": "2025-09-10 02:36:57.500443", + "step": 782, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:57.554161", + "step": 782, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014843541197478771, + "timestamp": "2025-09-10 02:36:57.556520", + "step": 783, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:36:57.624567", + "step": 783, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004676350392401218, + "timestamp": "2025-09-10 02:36:57.637826", + "step": 784, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:36:57.691802", + "step": 784, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005307146813720465, + "timestamp": "2025-09-10 02:36:57.701408", + "step": 785, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:36:57.768054", + "step": 785, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03211702033877373, + "timestamp": "2025-09-10 02:36:57.780244", + "step": 786, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:57.833669", + "step": 786, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04485854133963585, + "timestamp": "2025-09-10 02:36:57.835935", + "step": 787, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:36:57.896789", + "step": 787, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007537003606557846, + "timestamp": "2025-09-10 02:36:57.908482", + "step": 788, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:57.961411", + "step": 788, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003232221584767103, + "timestamp": "2025-09-10 02:36:57.963679", + "step": 789, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:58.016910", + "step": 789, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02600998990237713, + "timestamp": "2025-09-10 02:36:58.019180", + "step": 790, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:36:58.100801", + "step": 790, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01314751710742712, + "timestamp": "2025-09-10 02:36:58.115804", + "step": 791, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:58.169976", + "step": 791, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011830405332148075, + "timestamp": "2025-09-10 02:36:58.175921", + "step": 792, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:58.228540", + "step": 792, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01234391424804926, + "timestamp": "2025-09-10 02:36:58.230782", + "step": 793, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:58.283727", + "step": 793, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02767784334719181, + "timestamp": "2025-09-10 02:36:58.285952", + "step": 794, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:58.338954", + "step": 794, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014593282248824835, + "timestamp": "2025-09-10 02:36:58.341214", + "step": 795, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:58.399241", + "step": 795, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047555647790431976, + "timestamp": "2025-09-10 02:36:58.410435", + "step": 796, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:58.469602", + "step": 796, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01425406988710165, + "timestamp": "2025-09-10 02:36:58.481158", + "step": 797, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:58.534786", + "step": 797, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005846301559358835, + "timestamp": "2025-09-10 02:36:58.536794", + "step": 798, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:58.589682", + "step": 798, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02180892787873745, + "timestamp": "2025-09-10 02:36:58.591800", + "step": 799, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:58.645066", + "step": 799, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01302596740424633, + "timestamp": "2025-09-10 02:36:58.651430", + "step": 800, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:36:58.704127", + "step": 800, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016773927956819534, + "timestamp": "2025-09-10 02:36:58.706882", + "step": 801, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:36:58.760906", + "step": 801, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004892075899988413, + "timestamp": "2025-09-10 02:36:58.770673", + "step": 802, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:58.824302", + "step": 802, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002840865170583129, + "timestamp": "2025-09-10 02:36:58.826350", + "step": 803, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:58.886195", + "step": 803, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018173635005950928, + "timestamp": "2025-09-10 02:36:58.897720", + "step": 804, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:58.950440", + "step": 804, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025356922298669815, + "timestamp": "2025-09-10 02:36:58.952766", + "step": 805, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:36:59.006043", + "step": 805, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0071188295260071754, + "timestamp": "2025-09-10 02:36:59.013674", + "step": 806, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:59.066723", + "step": 806, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0071905506774783134, + "timestamp": "2025-09-10 02:36:59.068774", + "step": 807, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:59.121937", + "step": 807, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010201232507824898, + "timestamp": "2025-09-10 02:36:59.128204", + "step": 808, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:59.181164", + "step": 808, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0006716384668834507, + "timestamp": "2025-09-10 02:36:59.183382", + "step": 809, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:36:59.237100", + "step": 809, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009294861927628517, + "timestamp": "2025-09-10 02:36:59.239346", + "step": 810, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:36:59.292794", + "step": 810, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009038268588483334, + "timestamp": "2025-09-10 02:36:59.295129", + "step": 811, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:59.355506", + "step": 811, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01586053892970085, + "timestamp": "2025-09-10 02:36:59.367026", + "step": 812, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:36:59.419974", + "step": 812, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01941845752298832, + "timestamp": "2025-09-10 02:36:59.422362", + "step": 813, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:36:59.476908", + "step": 813, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008595914579927921, + "timestamp": "2025-09-10 02:36:59.479281", + "step": 814, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:59.533158", + "step": 814, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002296484773978591, + "timestamp": "2025-09-10 02:36:59.539035", + "step": 815, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:59.593264", + "step": 815, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020253313705325127, + "timestamp": "2025-09-10 02:36:59.599671", + "step": 816, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:36:59.659221", + "step": 816, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010540309362113476, + "timestamp": "2025-09-10 02:36:59.670736", + "step": 817, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:36:59.728584", + "step": 817, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022805223241448402, + "timestamp": "2025-09-10 02:36:59.739002", + "step": 818, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:36:59.792947", + "step": 818, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03727426007390022, + "timestamp": "2025-09-10 02:36:59.798505", + "step": 819, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:36:59.851574", + "step": 819, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03954698517918587, + "timestamp": "2025-09-10 02:36:59.858025", + "step": 820, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:36:59.910942", + "step": 820, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020105760544538498, + "timestamp": "2025-09-10 02:36:59.913253", + "step": 821, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:36:59.971654", + "step": 821, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06767837703227997, + "timestamp": "2025-09-10 02:36:59.974389", + "step": 822, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:37:00.031750", + "step": 822, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019293931545689702, + "timestamp": "2025-09-10 02:37:00.033880", + "step": 823, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:00.093895", + "step": 823, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019679716788232327, + "timestamp": "2025-09-10 02:37:00.100305", + "step": 824, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:00.244698", + "step": 824, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011093164794147015, + "timestamp": "2025-09-10 02:37:00.250234", + "step": 825, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:00.303379", + "step": 825, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039194487035274506, + "timestamp": "2025-09-10 02:37:00.305835", + "step": 826, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:00.359015", + "step": 826, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05020434036850929, + "timestamp": "2025-09-10 02:37:00.361245", + "step": 827, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:00.413939", + "step": 827, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006449654698371887, + "timestamp": "2025-09-10 02:37:00.420116", + "step": 828, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:00.472686", + "step": 828, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007012534886598587, + "timestamp": "2025-09-10 02:37:00.482887", + "step": 829, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:00.535772", + "step": 829, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01749882847070694, + "timestamp": "2025-09-10 02:37:00.538787", + "step": 830, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:00.591577", + "step": 830, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016114160418510437, + "timestamp": "2025-09-10 02:37:00.593718", + "step": 831, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:00.652188", + "step": 831, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028149515390396118, + "timestamp": "2025-09-10 02:37:00.662795", + "step": 832, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:00.715253", + "step": 832, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014135974459350109, + "timestamp": "2025-09-10 02:37:00.721502", + "step": 833, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:00.775531", + "step": 833, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03076109290122986, + "timestamp": "2025-09-10 02:37:00.777582", + "step": 834, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:00.830923", + "step": 834, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03094400279223919, + "timestamp": "2025-09-10 02:37:00.838245", + "step": 835, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:00.891325", + "step": 835, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022063499316573143, + "timestamp": "2025-09-10 02:37:00.897401", + "step": 836, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:00.949343", + "step": 836, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01406343374401331, + "timestamp": "2025-09-10 02:37:00.959603", + "step": 837, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:37:01.027564", + "step": 837, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03292260691523552, + "timestamp": "2025-09-10 02:37:01.040119", + "step": 838, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:01.097583", + "step": 838, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011248174123466015, + "timestamp": "2025-09-10 02:37:01.099885", + "step": 839, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:01.152435", + "step": 839, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02245539426803589, + "timestamp": "2025-09-10 02:37:01.158608", + "step": 840, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:37:01.230457", + "step": 840, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02683345414698124, + "timestamp": "2025-09-10 02:37:01.245446", + "step": 841, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:01.298475", + "step": 841, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011874644085764885, + "timestamp": "2025-09-10 02:37:01.300995", + "step": 842, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:01.353922", + "step": 842, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01341636199504137, + "timestamp": "2025-09-10 02:37:01.356022", + "step": 843, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:01.409223", + "step": 843, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012613373808562756, + "timestamp": "2025-09-10 02:37:01.415037", + "step": 844, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:37:01.481579", + "step": 844, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019389091059565544, + "timestamp": "2025-09-10 02:37:01.495360", + "step": 845, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:37:01.555560", + "step": 845, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004535573069006205, + "timestamp": "2025-09-10 02:37:01.566289", + "step": 846, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:37:01.627593", + "step": 846, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018833627924323082, + "timestamp": "2025-09-10 02:37:01.638696", + "step": 847, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:01.693159", + "step": 847, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027462048456072807, + "timestamp": "2025-09-10 02:37:01.699095", + "step": 848, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:01.753480", + "step": 848, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01780627854168415, + "timestamp": "2025-09-10 02:37:01.756085", + "step": 849, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:01.808895", + "step": 849, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008407698012888432, + "timestamp": "2025-09-10 02:37:01.816648", + "step": 850, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:01.869218", + "step": 850, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006790136452764273, + "timestamp": "2025-09-10 02:37:01.871042", + "step": 851, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:01.923983", + "step": 851, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0250264760106802, + "timestamp": "2025-09-10 02:37:01.931083", + "step": 852, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:37:02.001951", + "step": 852, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016610300168395042, + "timestamp": "2025-09-10 02:37:02.016916", + "step": 853, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:37:02.087453", + "step": 853, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011690507642924786, + "timestamp": "2025-09-10 02:37:02.100061", + "step": 854, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:02.153665", + "step": 854, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007542649749666452, + "timestamp": "2025-09-10 02:37:02.155574", + "step": 855, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:02.208037", + "step": 855, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006887788884341717, + "timestamp": "2025-09-10 02:37:02.214984", + "step": 856, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:02.269234", + "step": 856, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01489509828388691, + "timestamp": "2025-09-10 02:37:02.279187", + "step": 857, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:02.331911", + "step": 857, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01974819228053093, + "timestamp": "2025-09-10 02:37:02.334767", + "step": 858, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:02.388192", + "step": 858, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016354762017726898, + "timestamp": "2025-09-10 02:37:02.392577", + "step": 859, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:37:02.454075", + "step": 859, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014117122627794743, + "timestamp": "2025-09-10 02:37:02.465955", + "step": 860, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:02.522242", + "step": 860, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007971592247486115, + "timestamp": "2025-09-10 02:37:02.533471", + "step": 861, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:02.586745", + "step": 861, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009552216157317162, + "timestamp": "2025-09-10 02:37:02.589362", + "step": 862, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:02.644787", + "step": 862, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01481309998780489, + "timestamp": "2025-09-10 02:37:02.647258", + "step": 863, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:02.700522", + "step": 863, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017297828570008278, + "timestamp": "2025-09-10 02:37:02.712358", + "step": 864, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:02.785195", + "step": 864, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011291074566543102, + "timestamp": "2025-09-10 02:37:02.793106", + "step": 865, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:02.846382", + "step": 865, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026969680562615395, + "timestamp": "2025-09-10 02:37:02.848743", + "step": 866, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:02.901452", + "step": 866, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018329216167330742, + "timestamp": "2025-09-10 02:37:02.905913", + "step": 867, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:02.960839", + "step": 867, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007551748771220446, + "timestamp": "2025-09-10 02:37:02.966869", + "step": 868, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:03.027071", + "step": 868, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005394156090915203, + "timestamp": "2025-09-10 02:37:03.030241", + "step": 869, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:03.086559", + "step": 869, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005307991988956928, + "timestamp": "2025-09-10 02:37:03.088785", + "step": 870, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:03.145690", + "step": 870, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027891067788004875, + "timestamp": "2025-09-10 02:37:03.148665", + "step": 871, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:03.201190", + "step": 871, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010209517553448677, + "timestamp": "2025-09-10 02:37:03.207287", + "step": 872, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:03.264926", + "step": 872, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0062334248796105385, + "timestamp": "2025-09-10 02:37:03.267165", + "step": 873, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:03.325015", + "step": 873, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02778015471994877, + "timestamp": "2025-09-10 02:37:03.335485", + "step": 874, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:03.390570", + "step": 874, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012220826931297779, + "timestamp": "2025-09-10 02:37:03.392849", + "step": 875, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:03.446034", + "step": 875, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014624446630477905, + "timestamp": "2025-09-10 02:37:03.452251", + "step": 876, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:03.503993", + "step": 876, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029194417875260115, + "timestamp": "2025-09-10 02:37:03.506114", + "step": 877, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:03.558711", + "step": 877, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01652137003839016, + "timestamp": "2025-09-10 02:37:03.560798", + "step": 878, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:03.613556", + "step": 878, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010671539232134819, + "timestamp": "2025-09-10 02:37:03.620010", + "step": 879, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:03.672696", + "step": 879, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02381194569170475, + "timestamp": "2025-09-10 02:37:03.678330", + "step": 880, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:03.731137", + "step": 880, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022047102451324463, + "timestamp": "2025-09-10 02:37:03.741622", + "step": 881, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:03.794138", + "step": 881, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015093098394572735, + "timestamp": "2025-09-10 02:37:03.802377", + "step": 882, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:37:20.651392", + "step": 882, + "epoch": 1 + }, + { + "type": "pplx", + "content": 20869469.391494222, + "timestamp": "2025-09-10 02:37:20.654413", + "step": 882, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:20.714133", + "step": 882, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019753316417336464, + "timestamp": "2025-09-10 02:37:20.717175", + "step": 883, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:20.772666", + "step": 883, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010014870204031467, + "timestamp": "2025-09-10 02:37:20.779348", + "step": 884, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:20.833800", + "step": 884, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0305818822234869, + "timestamp": "2025-09-10 02:37:20.839175", + "step": 885, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:20.892884", + "step": 885, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011537541635334492, + "timestamp": "2025-09-10 02:37:20.895402", + "step": 886, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 784 + ], + "flops": 15680095254592.0 + }, + "timestamp": "2025-09-10 02:37:21.015383", + "step": 886, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028522804379463196, + "timestamp": "2025-09-10 02:37:21.037537", + "step": 887, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:21.094957", + "step": 887, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023994384333491325, + "timestamp": "2025-09-10 02:37:21.105517", + "step": 888, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:21.158292", + "step": 888, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014724929817020893, + "timestamp": "2025-09-10 02:37:21.162711", + "step": 889, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:21.215691", + "step": 889, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019524620845913887, + "timestamp": "2025-09-10 02:37:21.223557", + "step": 890, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:21.284604", + "step": 890, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00858976598829031, + "timestamp": "2025-09-10 02:37:21.290750", + "step": 891, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:37:21.359319", + "step": 891, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013474213890731335, + "timestamp": "2025-09-10 02:37:21.372772", + "step": 892, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:21.426134", + "step": 892, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04169805720448494, + "timestamp": "2025-09-10 02:37:21.428309", + "step": 893, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:21.489707", + "step": 893, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021386364474892616, + "timestamp": "2025-09-10 02:37:21.492533", + "step": 894, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:21.546895", + "step": 894, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0269964300096035, + "timestamp": "2025-09-10 02:37:21.549664", + "step": 895, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:21.603419", + "step": 895, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007117710076272488, + "timestamp": "2025-09-10 02:37:21.612022", + "step": 896, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:21.664638", + "step": 896, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034459445625543594, + "timestamp": "2025-09-10 02:37:21.667360", + "step": 897, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:21.727701", + "step": 897, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017979221884161234, + "timestamp": "2025-09-10 02:37:21.730022", + "step": 898, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:21.782700", + "step": 898, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01059913169592619, + "timestamp": "2025-09-10 02:37:21.785460", + "step": 899, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:21.838584", + "step": 899, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02367735095322132, + "timestamp": "2025-09-10 02:37:21.844528", + "step": 900, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:21.896560", + "step": 900, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004368102643638849, + "timestamp": "2025-09-10 02:37:21.898575", + "step": 901, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:21.950851", + "step": 901, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015609510242938995, + "timestamp": "2025-09-10 02:37:21.953668", + "step": 902, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:22.009153", + "step": 902, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03799451142549515, + "timestamp": "2025-09-10 02:37:22.014516", + "step": 903, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:37:22.083055", + "step": 903, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022819351870566607, + "timestamp": "2025-09-10 02:37:22.096019", + "step": 904, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:22.158777", + "step": 904, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003602777374908328, + "timestamp": "2025-09-10 02:37:22.164120", + "step": 905, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:22.217786", + "step": 905, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02644296921789646, + "timestamp": "2025-09-10 02:37:22.220013", + "step": 906, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:37:22.294376", + "step": 906, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030447587370872498, + "timestamp": "2025-09-10 02:37:22.308275", + "step": 907, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:37:22.369152", + "step": 907, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005110509227961302, + "timestamp": "2025-09-10 02:37:22.380693", + "step": 908, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:22.434012", + "step": 908, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019296277314424515, + "timestamp": "2025-09-10 02:37:22.436062", + "step": 909, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:22.489479", + "step": 909, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004453285131603479, + "timestamp": "2025-09-10 02:37:22.491523", + "step": 910, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:22.544779", + "step": 910, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037371909711509943, + "timestamp": "2025-09-10 02:37:22.546869", + "step": 911, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:22.600931", + "step": 911, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01771688647568226, + "timestamp": "2025-09-10 02:37:22.607251", + "step": 912, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:22.661913", + "step": 912, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012563616037368774, + "timestamp": "2025-09-10 02:37:22.664087", + "step": 913, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:22.718792", + "step": 913, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011310129426419735, + "timestamp": "2025-09-10 02:37:22.720987", + "step": 914, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:22.774989", + "step": 914, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016410497948527336, + "timestamp": "2025-09-10 02:37:22.777121", + "step": 915, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:22.830237", + "step": 915, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006099455989897251, + "timestamp": "2025-09-10 02:37:22.836281", + "step": 916, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:22.888958", + "step": 916, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01592996157705784, + "timestamp": "2025-09-10 02:37:22.891053", + "step": 917, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:22.943212", + "step": 917, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01166507601737976, + "timestamp": "2025-09-10 02:37:22.946000", + "step": 918, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:22.999597", + "step": 918, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004178749863058329, + "timestamp": "2025-09-10 02:37:23.002301", + "step": 919, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:23.055315", + "step": 919, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035687366034835577, + "timestamp": "2025-09-10 02:37:23.061499", + "step": 920, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:23.117005", + "step": 920, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027293937746435404, + "timestamp": "2025-09-10 02:37:23.119196", + "step": 921, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:23.173803", + "step": 921, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002716184128075838, + "timestamp": "2025-09-10 02:37:23.183136", + "step": 922, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:23.238202", + "step": 922, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01797613315284252, + "timestamp": "2025-09-10 02:37:23.244612", + "step": 923, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:23.298621", + "step": 923, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013181501068174839, + "timestamp": "2025-09-10 02:37:23.304722", + "step": 924, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:23.371944", + "step": 924, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010413519106805325, + "timestamp": "2025-09-10 02:37:23.377827", + "step": 925, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:37:23.441420", + "step": 925, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00984834972769022, + "timestamp": "2025-09-10 02:37:23.454547", + "step": 926, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:23.523230", + "step": 926, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02834111824631691, + "timestamp": "2025-09-10 02:37:23.527975", + "step": 927, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:23.600815", + "step": 927, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005702574271708727, + "timestamp": "2025-09-10 02:37:23.611012", + "step": 928, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:23.669715", + "step": 928, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024012399837374687, + "timestamp": "2025-09-10 02:37:23.675668", + "step": 929, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:23.737279", + "step": 929, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019258147105574608, + "timestamp": "2025-09-10 02:37:23.743841", + "step": 930, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:23.815044", + "step": 930, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035729738883674145, + "timestamp": "2025-09-10 02:37:23.824841", + "step": 931, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:23.881800", + "step": 931, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013548379763960838, + "timestamp": "2025-09-10 02:37:23.895440", + "step": 932, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:23.955157", + "step": 932, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021062707528471947, + "timestamp": "2025-09-10 02:37:23.962403", + "step": 933, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:24.017783", + "step": 933, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0214505884796381, + "timestamp": "2025-09-10 02:37:24.027741", + "step": 934, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:24.087759", + "step": 934, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020234962925314903, + "timestamp": "2025-09-10 02:37:24.091438", + "step": 935, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:24.154593", + "step": 935, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007375818677246571, + "timestamp": "2025-09-10 02:37:24.164577", + "step": 936, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:24.231528", + "step": 936, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03821520134806633, + "timestamp": "2025-09-10 02:37:24.239250", + "step": 937, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:24.308413", + "step": 937, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001402406021952629, + "timestamp": "2025-09-10 02:37:24.310546", + "step": 938, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:24.364572", + "step": 938, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036980025470256805, + "timestamp": "2025-09-10 02:37:24.366720", + "step": 939, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:24.419885", + "step": 939, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020188165828585625, + "timestamp": "2025-09-10 02:37:24.426174", + "step": 940, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:37:24.491994", + "step": 940, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015013186493888497, + "timestamp": "2025-09-10 02:37:24.505646", + "step": 941, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:24.559282", + "step": 941, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005488789640367031, + "timestamp": "2025-09-10 02:37:24.561430", + "step": 942, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:24.614292", + "step": 942, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015114936046302319, + "timestamp": "2025-09-10 02:37:24.617117", + "step": 943, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:24.670318", + "step": 943, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01635124906897545, + "timestamp": "2025-09-10 02:37:24.676386", + "step": 944, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:24.728570", + "step": 944, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02673029527068138, + "timestamp": "2025-09-10 02:37:24.730783", + "step": 945, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:24.783898", + "step": 945, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023836245760321617, + "timestamp": "2025-09-10 02:37:24.785892", + "step": 946, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:24.838782", + "step": 946, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012506467290222645, + "timestamp": "2025-09-10 02:37:24.840983", + "step": 947, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:24.893496", + "step": 947, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003875403432175517, + "timestamp": "2025-09-10 02:37:24.899656", + "step": 948, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:24.951520", + "step": 948, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017567379400134087, + "timestamp": "2025-09-10 02:37:24.954324", + "step": 949, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:25.006905", + "step": 949, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00482145743444562, + "timestamp": "2025-09-10 02:37:25.009063", + "step": 950, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:25.061710", + "step": 950, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02905195951461792, + "timestamp": "2025-09-10 02:37:25.064468", + "step": 951, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:25.117589", + "step": 951, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005482594482600689, + "timestamp": "2025-09-10 02:37:25.126403", + "step": 952, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:25.179035", + "step": 952, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01592409238219261, + "timestamp": "2025-09-10 02:37:25.181007", + "step": 953, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:25.233815", + "step": 953, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024157240986824036, + "timestamp": "2025-09-10 02:37:25.240279", + "step": 954, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:25.297972", + "step": 954, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021483780816197395, + "timestamp": "2025-09-10 02:37:25.308401", + "step": 955, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:25.361147", + "step": 955, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006510969717055559, + "timestamp": "2025-09-10 02:37:25.368372", + "step": 956, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:25.420754", + "step": 956, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030279411002993584, + "timestamp": "2025-09-10 02:37:25.422817", + "step": 957, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:25.475686", + "step": 957, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018104463815689087, + "timestamp": "2025-09-10 02:37:25.477827", + "step": 958, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:25.530938", + "step": 958, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05846955254673958, + "timestamp": "2025-09-10 02:37:25.533073", + "step": 959, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:25.585675", + "step": 959, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0069233691319823265, + "timestamp": "2025-09-10 02:37:25.591570", + "step": 960, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:25.644135", + "step": 960, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02537854015827179, + "timestamp": "2025-09-10 02:37:25.646168", + "step": 961, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:37:25.707005", + "step": 961, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004002484958618879, + "timestamp": "2025-09-10 02:37:25.718112", + "step": 962, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:25.772178", + "step": 962, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.058410581201314926, + "timestamp": "2025-09-10 02:37:25.781784", + "step": 963, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:37:25.842984", + "step": 963, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005029276013374329, + "timestamp": "2025-09-10 02:37:25.854695", + "step": 964, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:25.907547", + "step": 964, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018447445705533028, + "timestamp": "2025-09-10 02:37:25.909777", + "step": 965, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:25.963575", + "step": 965, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024119997397065163, + "timestamp": "2025-09-10 02:37:25.965886", + "step": 966, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:26.019108", + "step": 966, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029526797588914633, + "timestamp": "2025-09-10 02:37:26.021461", + "step": 967, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:26.075812", + "step": 967, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011708670295774937, + "timestamp": "2025-09-10 02:37:26.086371", + "step": 968, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 576 + ], + "flops": 11520070000896.0 + }, + "timestamp": "2025-09-10 02:37:26.167437", + "step": 968, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010926509276032448, + "timestamp": "2025-09-10 02:37:26.184488", + "step": 969, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:26.237659", + "step": 969, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007356074173003435, + "timestamp": "2025-09-10 02:37:26.243695", + "step": 970, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:26.296984", + "step": 970, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009588902816176414, + "timestamp": "2025-09-10 02:37:26.299088", + "step": 971, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:26.352177", + "step": 971, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004068955313414335, + "timestamp": "2025-09-10 02:37:26.361729", + "step": 972, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:37:26.420236", + "step": 972, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01241745799779892, + "timestamp": "2025-09-10 02:37:26.431781", + "step": 973, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:37:26.484972", + "step": 973, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0047929794527590275, + "timestamp": "2025-09-10 02:37:26.486955", + "step": 974, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:26.539958", + "step": 974, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01755589060485363, + "timestamp": "2025-09-10 02:37:26.541950", + "step": 975, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:26.595109", + "step": 975, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034460101276636124, + "timestamp": "2025-09-10 02:37:26.601124", + "step": 976, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:26.653233", + "step": 976, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030974719673395157, + "timestamp": "2025-09-10 02:37:26.655258", + "step": 977, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:37:26.707649", + "step": 977, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016706557944417, + "timestamp": "2025-09-10 02:37:26.709553", + "step": 978, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:26.762118", + "step": 978, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011854681186378002, + "timestamp": "2025-09-10 02:37:26.764060", + "step": 979, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:26.817488", + "step": 979, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012618793174624443, + "timestamp": "2025-09-10 02:37:26.824345", + "step": 980, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:26.876718", + "step": 980, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009645439684391022, + "timestamp": "2025-09-10 02:37:26.878784", + "step": 981, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:26.931714", + "step": 981, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020316239446401596, + "timestamp": "2025-09-10 02:37:26.939923", + "step": 982, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:26.992894", + "step": 982, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01200488768517971, + "timestamp": "2025-09-10 02:37:26.994896", + "step": 983, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:27.046853", + "step": 983, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00738444784656167, + "timestamp": "2025-09-10 02:37:27.052549", + "step": 984, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:37:27.103948", + "step": 984, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006664204876869917, + "timestamp": "2025-09-10 02:37:27.106095", + "step": 985, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:27.174167", + "step": 985, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02430642768740654, + "timestamp": "2025-09-10 02:37:27.176463", + "step": 986, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:27.233614", + "step": 986, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020492246374487877, + "timestamp": "2025-09-10 02:37:27.238808", + "step": 987, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:27.297906", + "step": 987, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0061102090403437614, + "timestamp": "2025-09-10 02:37:27.309121", + "step": 988, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:27.360981", + "step": 988, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007920605130493641, + "timestamp": "2025-09-10 02:37:27.363154", + "step": 989, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:27.416351", + "step": 989, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03330426290631294, + "timestamp": "2025-09-10 02:37:27.425938", + "step": 990, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:27.478779", + "step": 990, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028603479266166687, + "timestamp": "2025-09-10 02:37:27.481062", + "step": 991, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:27.533716", + "step": 991, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015076139941811562, + "timestamp": "2025-09-10 02:37:27.539522", + "step": 992, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:27.591893", + "step": 992, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006608594208955765, + "timestamp": "2025-09-10 02:37:27.593844", + "step": 993, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:27.646415", + "step": 993, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.056671105325222015, + "timestamp": "2025-09-10 02:37:27.648443", + "step": 994, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:27.700978", + "step": 994, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005207512062042952, + "timestamp": "2025-09-10 02:37:27.703927", + "step": 995, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:27.756783", + "step": 995, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028034314513206482, + "timestamp": "2025-09-10 02:37:27.762490", + "step": 996, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:27.814890", + "step": 996, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012034551240503788, + "timestamp": "2025-09-10 02:37:27.825139", + "step": 997, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:27.878250", + "step": 997, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023099612444639206, + "timestamp": "2025-09-10 02:37:27.880505", + "step": 998, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:27.933345", + "step": 998, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007459663785994053, + "timestamp": "2025-09-10 02:37:27.935742", + "step": 999, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:27.994504", + "step": 999, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02349942922592163, + "timestamp": "2025-09-10 02:37:28.005088", + "step": 1000, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 1000", + "timestamp": "2025-09-10 02:37:28.489240", + "step": 1000, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:28.543458", + "step": 1000, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014669693075120449, + "timestamp": "2025-09-10 02:37:28.545761", + "step": 1001, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:28.598898", + "step": 1001, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028776686638593674, + "timestamp": "2025-09-10 02:37:28.608422", + "step": 1002, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:28.662115", + "step": 1002, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03464367985725403, + "timestamp": "2025-09-10 02:37:28.664138", + "step": 1003, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:28.716945", + "step": 1003, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024242648854851723, + "timestamp": "2025-09-10 02:37:28.724224", + "step": 1004, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:28.786540", + "step": 1004, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038966622669249773, + "timestamp": "2025-09-10 02:37:28.792975", + "step": 1005, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:28.845998", + "step": 1005, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01966005750000477, + "timestamp": "2025-09-10 02:37:28.848076", + "step": 1006, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:28.900502", + "step": 1006, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0049477508291602135, + "timestamp": "2025-09-10 02:37:28.902574", + "step": 1007, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:28.954704", + "step": 1007, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00921022891998291, + "timestamp": "2025-09-10 02:37:28.961234", + "step": 1008, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:29.016114", + "step": 1008, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02026495151221752, + "timestamp": "2025-09-10 02:37:29.028497", + "step": 1009, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:29.081111", + "step": 1009, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03712758794426918, + "timestamp": "2025-09-10 02:37:29.086151", + "step": 1010, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:29.141777", + "step": 1010, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012790190987288952, + "timestamp": "2025-09-10 02:37:29.148091", + "step": 1011, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:29.206205", + "step": 1011, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016143744811415672, + "timestamp": "2025-09-10 02:37:29.211943", + "step": 1012, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:29.268079", + "step": 1012, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012261556461453438, + "timestamp": "2025-09-10 02:37:29.271480", + "step": 1013, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:29.325039", + "step": 1013, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005705040879547596, + "timestamp": "2025-09-10 02:37:29.327039", + "step": 1014, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:29.381028", + "step": 1014, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009582250379025936, + "timestamp": "2025-09-10 02:37:29.389697", + "step": 1015, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:29.446588", + "step": 1015, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026290949434041977, + "timestamp": "2025-09-10 02:37:29.452465", + "step": 1016, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:29.504316", + "step": 1016, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01569192297756672, + "timestamp": "2025-09-10 02:37:29.509140", + "step": 1017, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:29.561622", + "step": 1017, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005800510756671429, + "timestamp": "2025-09-10 02:37:29.569727", + "step": 1018, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:37:29.637230", + "step": 1018, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02611362375319004, + "timestamp": "2025-09-10 02:37:29.649826", + "step": 1019, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:29.702667", + "step": 1019, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00921781174838543, + "timestamp": "2025-09-10 02:37:29.708460", + "step": 1020, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:37:29.761153", + "step": 1020, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028634995222091675, + "timestamp": "2025-09-10 02:37:29.763343", + "step": 1021, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:29.820325", + "step": 1021, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02150345966219902, + "timestamp": "2025-09-10 02:37:29.822418", + "step": 1022, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:29.876888", + "step": 1022, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025369632989168167, + "timestamp": "2025-09-10 02:37:29.886640", + "step": 1023, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:37:29.948450", + "step": 1023, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023534994572401047, + "timestamp": "2025-09-10 02:37:29.960345", + "step": 1024, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:37:30.026379", + "step": 1024, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012249263003468513, + "timestamp": "2025-09-10 02:37:30.040031", + "step": 1025, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:30.097662", + "step": 1025, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019058462232351303, + "timestamp": "2025-09-10 02:37:30.108096", + "step": 1026, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:30.164934", + "step": 1026, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007724892348051071, + "timestamp": "2025-09-10 02:37:30.174571", + "step": 1027, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:37:30.247665", + "step": 1027, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012001357972621918, + "timestamp": "2025-09-10 02:37:30.259580", + "step": 1028, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:30.312582", + "step": 1028, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005957506131380796, + "timestamp": "2025-09-10 02:37:30.320781", + "step": 1029, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:37:47.206926", + "step": 1029, + "epoch": 1 + }, + { + "type": "pplx", + "content": 24619440.02975512, + "timestamp": "2025-09-10 02:37:47.209600", + "step": 1029, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:47.263771", + "step": 1029, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02884192392230034, + "timestamp": "2025-09-10 02:37:47.267627", + "step": 1030, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:47.320940", + "step": 1030, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011000215075910091, + "timestamp": "2025-09-10 02:37:47.322776", + "step": 1031, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:47.378122", + "step": 1031, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011350167915225029, + "timestamp": "2025-09-10 02:37:47.384860", + "step": 1032, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:47.444205", + "step": 1032, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01561619620770216, + "timestamp": "2025-09-10 02:37:47.446115", + "step": 1033, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:47.498539", + "step": 1033, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01655939407646656, + "timestamp": "2025-09-10 02:37:47.513302", + "step": 1034, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:47.577064", + "step": 1034, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005556761287152767, + "timestamp": "2025-09-10 02:37:47.580405", + "step": 1035, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:47.636782", + "step": 1035, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016216743737459183, + "timestamp": "2025-09-10 02:37:47.645712", + "step": 1036, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:47.700483", + "step": 1036, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034411948174238205, + "timestamp": "2025-09-10 02:37:47.711033", + "step": 1037, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:47.763991", + "step": 1037, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006037055980414152, + "timestamp": "2025-09-10 02:37:47.766135", + "step": 1038, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:47.818945", + "step": 1038, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005499404389411211, + "timestamp": "2025-09-10 02:37:47.820973", + "step": 1039, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:47.874955", + "step": 1039, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022077979519963264, + "timestamp": "2025-09-10 02:37:47.885358", + "step": 1040, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:37:47.945031", + "step": 1040, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008110353723168373, + "timestamp": "2025-09-10 02:37:47.957041", + "step": 1041, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:48.019306", + "step": 1041, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024108950048685074, + "timestamp": "2025-09-10 02:37:48.025951", + "step": 1042, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:48.078876", + "step": 1042, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027277912944555283, + "timestamp": "2025-09-10 02:37:48.080847", + "step": 1043, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:48.133481", + "step": 1043, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009014950133860111, + "timestamp": "2025-09-10 02:37:48.139031", + "step": 1044, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:48.190974", + "step": 1044, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009914405643939972, + "timestamp": "2025-09-10 02:37:48.201216", + "step": 1045, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:37:48.269417", + "step": 1045, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006300345994532108, + "timestamp": "2025-09-10 02:37:48.282143", + "step": 1046, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:48.334989", + "step": 1046, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00424076896160841, + "timestamp": "2025-09-10 02:37:48.337099", + "step": 1047, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:48.389580", + "step": 1047, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034432608634233475, + "timestamp": "2025-09-10 02:37:48.395326", + "step": 1048, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:48.446945", + "step": 1048, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015104919672012329, + "timestamp": "2025-09-10 02:37:48.457232", + "step": 1049, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:48.511389", + "step": 1049, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025710944086313248, + "timestamp": "2025-09-10 02:37:48.513353", + "step": 1050, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:48.565779", + "step": 1050, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021173741668462753, + "timestamp": "2025-09-10 02:37:48.572349", + "step": 1051, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:48.630570", + "step": 1051, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013069218955934048, + "timestamp": "2025-09-10 02:37:48.641751", + "step": 1052, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:48.694417", + "step": 1052, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01654890365898609, + "timestamp": "2025-09-10 02:37:48.696395", + "step": 1053, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:48.753911", + "step": 1053, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01027678046375513, + "timestamp": "2025-09-10 02:37:48.764329", + "step": 1054, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:48.816780", + "step": 1054, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019616004079580307, + "timestamp": "2025-09-10 02:37:48.823174", + "step": 1055, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:48.875669", + "step": 1055, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01368601806461811, + "timestamp": "2025-09-10 02:37:48.881479", + "step": 1056, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:48.932942", + "step": 1056, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022524571046233177, + "timestamp": "2025-09-10 02:37:48.935988", + "step": 1057, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:48.988151", + "step": 1057, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005104460753500462, + "timestamp": "2025-09-10 02:37:48.991258", + "step": 1058, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:49.046483", + "step": 1058, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01909588649868965, + "timestamp": "2025-09-10 02:37:49.049648", + "step": 1059, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:49.101786", + "step": 1059, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008402914740145206, + "timestamp": "2025-09-10 02:37:49.107288", + "step": 1060, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:49.158871", + "step": 1060, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0161873959004879, + "timestamp": "2025-09-10 02:37:49.160959", + "step": 1061, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:37:49.220713", + "step": 1061, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007617958355695009, + "timestamp": "2025-09-10 02:37:49.231419", + "step": 1062, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:49.284675", + "step": 1062, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02958431839942932, + "timestamp": "2025-09-10 02:37:49.291383", + "step": 1063, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:37:49.364424", + "step": 1063, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022033551707863808, + "timestamp": "2025-09-10 02:37:49.378901", + "step": 1064, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:49.431331", + "step": 1064, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02828170545399189, + "timestamp": "2025-09-10 02:37:49.433325", + "step": 1065, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:49.486112", + "step": 1065, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01017991453409195, + "timestamp": "2025-09-10 02:37:49.488063", + "step": 1066, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:49.540450", + "step": 1066, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017581427469849586, + "timestamp": "2025-09-10 02:37:49.542388", + "step": 1067, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:49.594943", + "step": 1067, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006788523402065039, + "timestamp": "2025-09-10 02:37:49.602335", + "step": 1068, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:49.654227", + "step": 1068, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011207656934857368, + "timestamp": "2025-09-10 02:37:49.656280", + "step": 1069, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:49.708439", + "step": 1069, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005644269287586212, + "timestamp": "2025-09-10 02:37:49.710656", + "step": 1070, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:49.762784", + "step": 1070, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03670249506831169, + "timestamp": "2025-09-10 02:37:49.769621", + "step": 1071, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:49.821936", + "step": 1071, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014227320440113544, + "timestamp": "2025-09-10 02:37:49.827555", + "step": 1072, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:49.879455", + "step": 1072, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007771211676299572, + "timestamp": "2025-09-10 02:37:49.882627", + "step": 1073, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:49.934660", + "step": 1073, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008874562568962574, + "timestamp": "2025-09-10 02:37:49.936849", + "step": 1074, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:49.989454", + "step": 1074, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029543662443757057, + "timestamp": "2025-09-10 02:37:49.991593", + "step": 1075, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:50.043818", + "step": 1075, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004253097344189882, + "timestamp": "2025-09-10 02:37:50.049428", + "step": 1076, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:50.101070", + "step": 1076, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005809030961245298, + "timestamp": "2025-09-10 02:37:50.102905", + "step": 1077, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:50.154900", + "step": 1077, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010655926540493965, + "timestamp": "2025-09-10 02:37:50.157170", + "step": 1078, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:50.209772", + "step": 1078, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009624289348721504, + "timestamp": "2025-09-10 02:37:50.212717", + "step": 1079, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:50.266677", + "step": 1079, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006700613535940647, + "timestamp": "2025-09-10 02:37:50.272389", + "step": 1080, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:50.326123", + "step": 1080, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005288519896566868, + "timestamp": "2025-09-10 02:37:50.329455", + "step": 1081, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:37:50.390635", + "step": 1081, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022787742782384157, + "timestamp": "2025-09-10 02:37:50.401563", + "step": 1082, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:50.462272", + "step": 1082, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019391821697354317, + "timestamp": "2025-09-10 02:37:50.465190", + "step": 1083, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:50.519309", + "step": 1083, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010357503779232502, + "timestamp": "2025-09-10 02:37:50.525094", + "step": 1084, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:50.577254", + "step": 1084, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02498718723654747, + "timestamp": "2025-09-10 02:37:50.585520", + "step": 1085, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:50.637985", + "step": 1085, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018656384199857712, + "timestamp": "2025-09-10 02:37:50.639966", + "step": 1086, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:50.692361", + "step": 1086, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009002963081002235, + "timestamp": "2025-09-10 02:37:50.700435", + "step": 1087, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:50.753069", + "step": 1087, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015503662638366222, + "timestamp": "2025-09-10 02:37:50.760444", + "step": 1088, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:50.817035", + "step": 1088, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015446570701897144, + "timestamp": "2025-09-10 02:37:50.818930", + "step": 1089, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:50.871366", + "step": 1089, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009854676201939583, + "timestamp": "2025-09-10 02:37:50.879581", + "step": 1090, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:50.932492", + "step": 1090, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00652840081602335, + "timestamp": "2025-09-10 02:37:50.934549", + "step": 1091, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:50.987250", + "step": 1091, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008367020636796951, + "timestamp": "2025-09-10 02:37:50.996328", + "step": 1092, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:51.051122", + "step": 1092, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006936206948012114, + "timestamp": "2025-09-10 02:37:51.061375", + "step": 1093, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:51.114312", + "step": 1093, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02077474072575569, + "timestamp": "2025-09-10 02:37:51.116238", + "step": 1094, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:51.168891", + "step": 1094, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01412847638130188, + "timestamp": "2025-09-10 02:37:51.175461", + "step": 1095, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:51.228332", + "step": 1095, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014179607853293419, + "timestamp": "2025-09-10 02:37:51.234178", + "step": 1096, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:51.286239", + "step": 1096, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0063507407903671265, + "timestamp": "2025-09-10 02:37:51.288454", + "step": 1097, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:37:51.342843", + "step": 1097, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007183849345892668, + "timestamp": "2025-09-10 02:37:51.352606", + "step": 1098, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:37:51.419043", + "step": 1098, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010225744917988777, + "timestamp": "2025-09-10 02:37:51.431290", + "step": 1099, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:51.484416", + "step": 1099, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0024694055318832397, + "timestamp": "2025-09-10 02:37:51.490117", + "step": 1100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:51.542036", + "step": 1100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008561083115637302, + "timestamp": "2025-09-10 02:37:51.545283", + "step": 1101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:51.597857", + "step": 1101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04253784194588661, + "timestamp": "2025-09-10 02:37:51.599694", + "step": 1102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:51.656904", + "step": 1102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004142506048083305, + "timestamp": "2025-09-10 02:37:51.667321", + "step": 1103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:51.719576", + "step": 1103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019315490499138832, + "timestamp": "2025-09-10 02:37:51.727041", + "step": 1104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:51.779109", + "step": 1104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027652818243950605, + "timestamp": "2025-09-10 02:37:51.781288", + "step": 1105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:51.833789", + "step": 1105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008513693697750568, + "timestamp": "2025-09-10 02:37:51.835775", + "step": 1106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:37:51.895316", + "step": 1106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022946733981370926, + "timestamp": "2025-09-10 02:37:51.906008", + "step": 1107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:51.958952", + "step": 1107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003004850819706917, + "timestamp": "2025-09-10 02:37:51.969283", + "step": 1108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:52.020963", + "step": 1108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02548404224216938, + "timestamp": "2025-09-10 02:37:52.022825", + "step": 1109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:52.075308", + "step": 1109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024745497852563858, + "timestamp": "2025-09-10 02:37:52.078141", + "step": 1110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:52.130697", + "step": 1110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003216462442651391, + "timestamp": "2025-09-10 02:37:52.132455", + "step": 1111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:52.185526", + "step": 1111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011288267560303211, + "timestamp": "2025-09-10 02:37:52.191281", + "step": 1112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:52.243628", + "step": 1112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003838461125269532, + "timestamp": "2025-09-10 02:37:52.245889", + "step": 1113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:52.298293", + "step": 1113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007491410709917545, + "timestamp": "2025-09-10 02:37:52.300279", + "step": 1114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:52.352778", + "step": 1114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025626661255955696, + "timestamp": "2025-09-10 02:37:52.354774", + "step": 1115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:52.407351", + "step": 1115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005912961903959513, + "timestamp": "2025-09-10 02:37:52.412799", + "step": 1116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:52.465936", + "step": 1116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009867900051176548, + "timestamp": "2025-09-10 02:37:52.467780", + "step": 1117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:52.519824", + "step": 1117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039868734776973724, + "timestamp": "2025-09-10 02:37:52.521873", + "step": 1118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:52.573781", + "step": 1118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013871312141418457, + "timestamp": "2025-09-10 02:37:52.575773", + "step": 1119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:52.627678", + "step": 1119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017347069457173347, + "timestamp": "2025-09-10 02:37:52.633198", + "step": 1120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:52.684659", + "step": 1120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002342894207686186, + "timestamp": "2025-09-10 02:37:52.686663", + "step": 1121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:52.739100", + "step": 1121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007890790700912476, + "timestamp": "2025-09-10 02:37:52.741356", + "step": 1122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:52.793966", + "step": 1122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009587155655026436, + "timestamp": "2025-09-10 02:37:52.796944", + "step": 1123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:52.849597", + "step": 1123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037315296940505505, + "timestamp": "2025-09-10 02:37:52.854992", + "step": 1124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:52.911968", + "step": 1124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019574126228690147, + "timestamp": "2025-09-10 02:37:52.923159", + "step": 1125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:52.976586", + "step": 1125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017041923478245735, + "timestamp": "2025-09-10 02:37:52.978758", + "step": 1126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:53.031337", + "step": 1126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02936060167849064, + "timestamp": "2025-09-10 02:37:53.033775", + "step": 1127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:53.086188", + "step": 1127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0074751111678779125, + "timestamp": "2025-09-10 02:37:53.091927", + "step": 1128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:37:53.148010", + "step": 1128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022677951492369175, + "timestamp": "2025-09-10 02:37:53.159211", + "step": 1129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:53.212112", + "step": 1129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0010953686432912946, + "timestamp": "2025-09-10 02:37:53.213824", + "step": 1130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:53.265732", + "step": 1130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006479484494775534, + "timestamp": "2025-09-10 02:37:53.272168", + "step": 1131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:53.324691", + "step": 1131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005161761771887541, + "timestamp": "2025-09-10 02:37:53.330197", + "step": 1132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:53.382178", + "step": 1132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014082864858210087, + "timestamp": "2025-09-10 02:37:53.384157", + "step": 1133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:53.436458", + "step": 1133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005313499365001917, + "timestamp": "2025-09-10 02:37:53.438540", + "step": 1134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:53.491145", + "step": 1134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019438592717051506, + "timestamp": "2025-09-10 02:37:53.493156", + "step": 1135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:53.545747", + "step": 1135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026171307545155287, + "timestamp": "2025-09-10 02:37:53.551366", + "step": 1136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:37:53.617146", + "step": 1136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014032737351953983, + "timestamp": "2025-09-10 02:37:53.628402", + "step": 1137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:53.681665", + "step": 1137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010052693076431751, + "timestamp": "2025-09-10 02:37:53.683698", + "step": 1138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:37:53.742899", + "step": 1138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001705739414319396, + "timestamp": "2025-09-10 02:37:53.753723", + "step": 1139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:53.806088", + "step": 1139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018248632550239563, + "timestamp": "2025-09-10 02:37:53.811508", + "step": 1140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:53.863735", + "step": 1140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002061953069642186, + "timestamp": "2025-09-10 02:37:53.866687", + "step": 1141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:53.919446", + "step": 1141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005538515746593475, + "timestamp": "2025-09-10 02:37:53.927691", + "step": 1142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:53.980148", + "step": 1142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036665797233581543, + "timestamp": "2025-09-10 02:37:53.982842", + "step": 1143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:54.034929", + "step": 1143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008468905463814735, + "timestamp": "2025-09-10 02:37:54.040485", + "step": 1144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:54.092161", + "step": 1144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009219801053404808, + "timestamp": "2025-09-10 02:37:54.094586", + "step": 1145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:37:54.146885", + "step": 1145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0010887905955314636, + "timestamp": "2025-09-10 02:37:54.149390", + "step": 1146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:54.201551", + "step": 1146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035215781535953283, + "timestamp": "2025-09-10 02:37:54.203665", + "step": 1147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:54.256210", + "step": 1147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010627214796841145, + "timestamp": "2025-09-10 02:37:54.261895", + "step": 1148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:54.313808", + "step": 1148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033319178968667984, + "timestamp": "2025-09-10 02:37:54.316698", + "step": 1149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:54.369947", + "step": 1149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001132026081904769, + "timestamp": "2025-09-10 02:37:54.373187", + "step": 1150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:54.426003", + "step": 1150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013359389267861843, + "timestamp": "2025-09-10 02:37:54.427894", + "step": 1151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:54.480914", + "step": 1151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013103527016937733, + "timestamp": "2025-09-10 02:37:54.491249", + "step": 1152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:54.543087", + "step": 1152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00590308103710413, + "timestamp": "2025-09-10 02:37:54.544869", + "step": 1153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:54.597236", + "step": 1153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006476046051830053, + "timestamp": "2025-09-10 02:37:54.599275", + "step": 1154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:54.651975", + "step": 1154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00043728688615374267, + "timestamp": "2025-09-10 02:37:54.658610", + "step": 1155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:54.711023", + "step": 1155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024524131789803505, + "timestamp": "2025-09-10 02:37:54.717060", + "step": 1156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:54.769756", + "step": 1156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03014794923365116, + "timestamp": "2025-09-10 02:37:54.772194", + "step": 1157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:37:54.825546", + "step": 1157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007397075649350882, + "timestamp": "2025-09-10 02:37:54.827584", + "step": 1158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:54.880182", + "step": 1158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019256308674812317, + "timestamp": "2025-09-10 02:37:54.882531", + "step": 1159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:37:54.935246", + "step": 1159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003273534355685115, + "timestamp": "2025-09-10 02:37:54.940675", + "step": 1160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:54.993288", + "step": 1160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006755192764103413, + "timestamp": "2025-09-10 02:37:54.999645", + "step": 1161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:55.052201", + "step": 1161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003680414753034711, + "timestamp": "2025-09-10 02:37:55.055821", + "step": 1162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:55.111138", + "step": 1162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012628121301531792, + "timestamp": "2025-09-10 02:37:55.114127", + "step": 1163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:55.167633", + "step": 1163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012532063759863377, + "timestamp": "2025-09-10 02:37:55.174039", + "step": 1164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:55.230030", + "step": 1164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027490537613630295, + "timestamp": "2025-09-10 02:37:55.233888", + "step": 1165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:55.288580", + "step": 1165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025479462929069996, + "timestamp": "2025-09-10 02:37:55.294543", + "step": 1166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:37:55.349019", + "step": 1166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005940371658653021, + "timestamp": "2025-09-10 02:37:55.354346", + "step": 1167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:37:55.410499", + "step": 1167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01090217474848032, + "timestamp": "2025-09-10 02:37:55.417464", + "step": 1168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:37:55.486228", + "step": 1168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05105715990066528, + "timestamp": "2025-09-10 02:37:55.499867", + "step": 1169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:37:55.563571", + "step": 1169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04473403841257095, + "timestamp": "2025-09-10 02:37:55.566427", + "step": 1170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:37:55.633662", + "step": 1170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012067670468240976, + "timestamp": "2025-09-10 02:37:55.636196", + "step": 1171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:37:55.690215", + "step": 1171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002647282090038061, + "timestamp": "2025-09-10 02:37:55.700623", + "step": 1172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:55.754154", + "step": 1172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006060405168682337, + "timestamp": "2025-09-10 02:37:55.762251", + "step": 1173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:37:55.816354", + "step": 1173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010575481690466404, + "timestamp": "2025-09-10 02:37:55.818535", + "step": 1174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:37:55.910600", + "step": 1174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04444717988371849, + "timestamp": "2025-09-10 02:37:55.923274", + "step": 1175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:37:55.977409", + "step": 1175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0008260281756520271, + "timestamp": "2025-09-10 02:37:55.986267", + "step": 1176, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:38:12.898862", + "step": 1176, + "epoch": 1 + }, + { + "type": "pplx", + "content": 27345006.897273418, + "timestamp": "2025-09-10 02:38:12.901641", + "step": 1176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:12.958740", + "step": 1176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022977551445364952, + "timestamp": "2025-09-10 02:38:12.966266", + "step": 1177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:13.020467", + "step": 1177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029089193791151047, + "timestamp": "2025-09-10 02:38:13.022248", + "step": 1178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:13.076233", + "step": 1178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003365251934155822, + "timestamp": "2025-09-10 02:38:13.078324", + "step": 1179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:38:13.131194", + "step": 1179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026831891387701035, + "timestamp": "2025-09-10 02:38:13.137427", + "step": 1180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:13.189921", + "step": 1180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015423259697854519, + "timestamp": "2025-09-10 02:38:13.191795", + "step": 1181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:38:13.267949", + "step": 1181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002096066251397133, + "timestamp": "2025-09-10 02:38:13.282038", + "step": 1182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:13.336140", + "step": 1182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015754317864775658, + "timestamp": "2025-09-10 02:38:13.344080", + "step": 1183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:13.397561", + "step": 1183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015605379594489932, + "timestamp": "2025-09-10 02:38:13.403909", + "step": 1184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:13.457070", + "step": 1184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04798185080289841, + "timestamp": "2025-09-10 02:38:13.459423", + "step": 1185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:13.512142", + "step": 1185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027037236839532852, + "timestamp": "2025-09-10 02:38:13.515428", + "step": 1186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:13.569324", + "step": 1186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00343741150572896, + "timestamp": "2025-09-10 02:38:13.576677", + "step": 1187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:38:13.630314", + "step": 1187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009087854065001011, + "timestamp": "2025-09-10 02:38:13.636269", + "step": 1188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:13.688489", + "step": 1188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019215011969208717, + "timestamp": "2025-09-10 02:38:13.690447", + "step": 1189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:13.743298", + "step": 1189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033194538205862045, + "timestamp": "2025-09-10 02:38:13.745396", + "step": 1190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:13.799027", + "step": 1190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019573742523789406, + "timestamp": "2025-09-10 02:38:13.808637", + "step": 1191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:13.861110", + "step": 1191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032782748341560364, + "timestamp": "2025-09-10 02:38:13.866709", + "step": 1192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:13.923187", + "step": 1192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006269785109907389, + "timestamp": "2025-09-10 02:38:13.934428", + "step": 1193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:13.987892", + "step": 1193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01658725179731846, + "timestamp": "2025-09-10 02:38:13.990860", + "step": 1194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 12480075828672.0 + }, + "timestamp": "2025-09-10 02:38:14.083622", + "step": 1194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017288723960518837, + "timestamp": "2025-09-10 02:38:14.100964", + "step": 1195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:14.154133", + "step": 1195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010024419054389, + "timestamp": "2025-09-10 02:38:14.162753", + "step": 1196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:14.215042", + "step": 1196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007814670912921429, + "timestamp": "2025-09-10 02:38:14.217997", + "step": 1197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:38:14.284892", + "step": 1197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012869198806583881, + "timestamp": "2025-09-10 02:38:14.297123", + "step": 1198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:14.350960", + "step": 1198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037422089371830225, + "timestamp": "2025-09-10 02:38:14.353152", + "step": 1199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:14.406833", + "step": 1199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030634628608822823, + "timestamp": "2025-09-10 02:38:14.412835", + "step": 1200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:14.469790", + "step": 1200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012157849036157131, + "timestamp": "2025-09-10 02:38:14.481032", + "step": 1201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:38:14.553745", + "step": 1201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041633132845163345, + "timestamp": "2025-09-10 02:38:14.567215", + "step": 1202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:38:14.629164", + "step": 1202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014732087729498744, + "timestamp": "2025-09-10 02:38:14.640263", + "step": 1203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:14.693337", + "step": 1203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01730882003903389, + "timestamp": "2025-09-10 02:38:14.699167", + "step": 1204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:14.751287", + "step": 1204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0406925305724144, + "timestamp": "2025-09-10 02:38:14.761452", + "step": 1205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:14.814922", + "step": 1205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014714895747601986, + "timestamp": "2025-09-10 02:38:14.822949", + "step": 1206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:14.875989", + "step": 1206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017625965178012848, + "timestamp": "2025-09-10 02:38:14.884211", + "step": 1207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:14.937946", + "step": 1207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04992163926362991, + "timestamp": "2025-09-10 02:38:14.948374", + "step": 1208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:15.000625", + "step": 1208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007176562212407589, + "timestamp": "2025-09-10 02:38:15.002555", + "step": 1209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:15.055144", + "step": 1209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027468519285321236, + "timestamp": "2025-09-10 02:38:15.057325", + "step": 1210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:15.110240", + "step": 1210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01314002275466919, + "timestamp": "2025-09-10 02:38:15.116878", + "step": 1211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:15.170150", + "step": 1211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017846744507551193, + "timestamp": "2025-09-10 02:38:15.176310", + "step": 1212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:15.232007", + "step": 1212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009627328254282475, + "timestamp": "2025-09-10 02:38:15.234229", + "step": 1213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:15.287080", + "step": 1213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018968701362609863, + "timestamp": "2025-09-10 02:38:15.289244", + "step": 1214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:15.342526", + "step": 1214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010385209694504738, + "timestamp": "2025-09-10 02:38:15.348911", + "step": 1215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:15.401961", + "step": 1215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010980025865137577, + "timestamp": "2025-09-10 02:38:15.409216", + "step": 1216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:15.461660", + "step": 1216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02287864498794079, + "timestamp": "2025-09-10 02:38:15.463604", + "step": 1217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:15.516676", + "step": 1217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02321491949260235, + "timestamp": "2025-09-10 02:38:15.518873", + "step": 1218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:15.571683", + "step": 1218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012190199457108974, + "timestamp": "2025-09-10 02:38:15.573671", + "step": 1219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:15.628782", + "step": 1219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008502763696014881, + "timestamp": "2025-09-10 02:38:15.634777", + "step": 1220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:38:15.687527", + "step": 1220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021801332011818886, + "timestamp": "2025-09-10 02:38:15.689709", + "step": 1221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:15.742823", + "step": 1221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017310811206698418, + "timestamp": "2025-09-10 02:38:15.744838", + "step": 1222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:15.798401", + "step": 1222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021017830818891525, + "timestamp": "2025-09-10 02:38:15.800728", + "step": 1223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:15.854630", + "step": 1223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0442025251686573, + "timestamp": "2025-09-10 02:38:15.865029", + "step": 1224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:15.919329", + "step": 1224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012025970034301281, + "timestamp": "2025-09-10 02:38:15.921666", + "step": 1225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:15.975498", + "step": 1225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020059674978256226, + "timestamp": "2025-09-10 02:38:15.985114", + "step": 1226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:16.038170", + "step": 1226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03154158964753151, + "timestamp": "2025-09-10 02:38:16.040409", + "step": 1227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:16.092998", + "step": 1227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013741062954068184, + "timestamp": "2025-09-10 02:38:16.098747", + "step": 1228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:16.150938", + "step": 1228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021907124668359756, + "timestamp": "2025-09-10 02:38:16.157549", + "step": 1229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:16.210417", + "step": 1229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004827319644391537, + "timestamp": "2025-09-10 02:38:16.212438", + "step": 1230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:16.265687", + "step": 1230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008723611012101173, + "timestamp": "2025-09-10 02:38:16.267672", + "step": 1231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:16.320927", + "step": 1231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027946332469582558, + "timestamp": "2025-09-10 02:38:16.331306", + "step": 1232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:16.386081", + "step": 1232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011744270101189613, + "timestamp": "2025-09-10 02:38:16.388189", + "step": 1233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:16.441696", + "step": 1233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018970759585499763, + "timestamp": "2025-09-10 02:38:16.443831", + "step": 1234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:16.497105", + "step": 1234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03357694670557976, + "timestamp": "2025-09-10 02:38:16.499194", + "step": 1235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:16.552121", + "step": 1235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005652002990245819, + "timestamp": "2025-09-10 02:38:16.558251", + "step": 1236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:16.610344", + "step": 1236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021448343992233276, + "timestamp": "2025-09-10 02:38:16.616700", + "step": 1237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:16.670336", + "step": 1237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011806346476078033, + "timestamp": "2025-09-10 02:38:16.680007", + "step": 1238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:16.733381", + "step": 1238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013083149679005146, + "timestamp": "2025-09-10 02:38:16.741571", + "step": 1239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:16.794628", + "step": 1239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008364038541913033, + "timestamp": "2025-09-10 02:38:16.800826", + "step": 1240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:16.853238", + "step": 1240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01520631369203329, + "timestamp": "2025-09-10 02:38:16.856085", + "step": 1241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:16.908824", + "step": 1241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004735896829515696, + "timestamp": "2025-09-10 02:38:16.911035", + "step": 1242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:16.964002", + "step": 1242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010918344371020794, + "timestamp": "2025-09-10 02:38:16.966103", + "step": 1243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:17.018589", + "step": 1243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004988556262105703, + "timestamp": "2025-09-10 02:38:17.024569", + "step": 1244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:17.077221", + "step": 1244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040805794298648834, + "timestamp": "2025-09-10 02:38:17.079275", + "step": 1245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:17.132795", + "step": 1245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016436351463198662, + "timestamp": "2025-09-10 02:38:17.134984", + "step": 1246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:17.188793", + "step": 1246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022019585594534874, + "timestamp": "2025-09-10 02:38:17.191049", + "step": 1247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:17.244231", + "step": 1247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005552917718887329, + "timestamp": "2025-09-10 02:38:17.250126", + "step": 1248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:17.302429", + "step": 1248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040717605501413345, + "timestamp": "2025-09-10 02:38:17.304710", + "step": 1249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:38:17.370782", + "step": 1249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019663427025079727, + "timestamp": "2025-09-10 02:38:17.383025", + "step": 1250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:17.435656", + "step": 1250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010957092978060246, + "timestamp": "2025-09-10 02:38:17.442277", + "step": 1251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:17.498542", + "step": 1251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017680354416370392, + "timestamp": "2025-09-10 02:38:17.504496", + "step": 1252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:17.557135", + "step": 1252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012005984783172607, + "timestamp": "2025-09-10 02:38:17.567396", + "step": 1253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:17.620900", + "step": 1253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013249626383185387, + "timestamp": "2025-09-10 02:38:17.627212", + "step": 1254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:17.680564", + "step": 1254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002336797770112753, + "timestamp": "2025-09-10 02:38:17.682872", + "step": 1255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:17.735397", + "step": 1255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023026512935757637, + "timestamp": "2025-09-10 02:38:17.741284", + "step": 1256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:17.794005", + "step": 1256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007024406921118498, + "timestamp": "2025-09-10 02:38:17.797072", + "step": 1257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:17.849531", + "step": 1257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020878229290246964, + "timestamp": "2025-09-10 02:38:17.851785", + "step": 1258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:17.904261", + "step": 1258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03259377181529999, + "timestamp": "2025-09-10 02:38:17.906331", + "step": 1259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:17.959703", + "step": 1259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014760008081793785, + "timestamp": "2025-09-10 02:38:17.965495", + "step": 1260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:18.022380", + "step": 1260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022210806608200073, + "timestamp": "2025-09-10 02:38:18.033641", + "step": 1261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:38:18.101455", + "step": 1261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01268862932920456, + "timestamp": "2025-09-10 02:38:18.114046", + "step": 1262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:18.167560", + "step": 1262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026451244484633207, + "timestamp": "2025-09-10 02:38:18.175841", + "step": 1263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:18.228933", + "step": 1263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01006376277655363, + "timestamp": "2025-09-10 02:38:18.234902", + "step": 1264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:18.287342", + "step": 1264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015301442705094814, + "timestamp": "2025-09-10 02:38:18.289703", + "step": 1265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:18.342503", + "step": 1265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02748100645840168, + "timestamp": "2025-09-10 02:38:18.350929", + "step": 1266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:18.405414", + "step": 1266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03254714235663414, + "timestamp": "2025-09-10 02:38:18.415261", + "step": 1267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:18.467782", + "step": 1267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005905542988330126, + "timestamp": "2025-09-10 02:38:18.474988", + "step": 1268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:18.528224", + "step": 1268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033585142344236374, + "timestamp": "2025-09-10 02:38:18.538678", + "step": 1269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:18.592672", + "step": 1269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012435145676136017, + "timestamp": "2025-09-10 02:38:18.594906", + "step": 1270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:18.647357", + "step": 1270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0311743151396513, + "timestamp": "2025-09-10 02:38:18.655566", + "step": 1271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:38:18.731154", + "step": 1271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006537002976983786, + "timestamp": "2025-09-10 02:38:18.746011", + "step": 1272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:38:18.811001", + "step": 1272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006863919552415609, + "timestamp": "2025-09-10 02:38:18.824219", + "step": 1273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:18.877154", + "step": 1273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0139911575242877, + "timestamp": "2025-09-10 02:38:18.879182", + "step": 1274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:18.931952", + "step": 1274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012018707348033786, + "timestamp": "2025-09-10 02:38:18.934151", + "step": 1275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:18.986608", + "step": 1275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01508967112749815, + "timestamp": "2025-09-10 02:38:18.992352", + "step": 1276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:19.044139", + "step": 1276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010803669691085815, + "timestamp": "2025-09-10 02:38:19.046210", + "step": 1277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:19.098974", + "step": 1277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025407282635569572, + "timestamp": "2025-09-10 02:38:19.107108", + "step": 1278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:19.159842", + "step": 1278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007086531259119511, + "timestamp": "2025-09-10 02:38:19.162068", + "step": 1279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:19.214911", + "step": 1279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01293234247714281, + "timestamp": "2025-09-10 02:38:19.220506", + "step": 1280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:38:19.275396", + "step": 1280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01469672191888094, + "timestamp": "2025-09-10 02:38:19.277522", + "step": 1281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:19.330703", + "step": 1281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014103470370173454, + "timestamp": "2025-09-10 02:38:19.337199", + "step": 1282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:38:19.399760", + "step": 1282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02563401870429516, + "timestamp": "2025-09-10 02:38:19.410697", + "step": 1283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:19.464170", + "step": 1283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006677473429590464, + "timestamp": "2025-09-10 02:38:19.469997", + "step": 1284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:19.522340", + "step": 1284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016647594049572945, + "timestamp": "2025-09-10 02:38:19.524453", + "step": 1285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:19.578283", + "step": 1285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032691562082618475, + "timestamp": "2025-09-10 02:38:19.587921", + "step": 1286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:19.641644", + "step": 1286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004560043569654226, + "timestamp": "2025-09-10 02:38:19.651268", + "step": 1287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:19.705837", + "step": 1287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015763049945235252, + "timestamp": "2025-09-10 02:38:19.716468", + "step": 1288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:19.773301", + "step": 1288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010525521822273731, + "timestamp": "2025-09-10 02:38:19.784470", + "step": 1289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:19.839252", + "step": 1289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014197276905179024, + "timestamp": "2025-09-10 02:38:19.841538", + "step": 1290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:19.895219", + "step": 1290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002154273446649313, + "timestamp": "2025-09-10 02:38:19.897402", + "step": 1291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:19.950254", + "step": 1291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004334344062954187, + "timestamp": "2025-09-10 02:38:19.955868", + "step": 1292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:20.008145", + "step": 1292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01705791987478733, + "timestamp": "2025-09-10 02:38:20.014902", + "step": 1293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:20.067808", + "step": 1293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029170287307351828, + "timestamp": "2025-09-10 02:38:20.076005", + "step": 1294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:38:20.128619", + "step": 1294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007720049936324358, + "timestamp": "2025-09-10 02:38:20.130868", + "step": 1295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:20.183277", + "step": 1295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043088365346193314, + "timestamp": "2025-09-10 02:38:20.188831", + "step": 1296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:38:20.248234", + "step": 1296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02546166256070137, + "timestamp": "2025-09-10 02:38:20.260041", + "step": 1297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:20.312790", + "step": 1297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020500587299466133, + "timestamp": "2025-09-10 02:38:20.321233", + "step": 1298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:20.377055", + "step": 1298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013855491764843464, + "timestamp": "2025-09-10 02:38:20.381413", + "step": 1299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:20.441191", + "step": 1299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01237348560243845, + "timestamp": "2025-09-10 02:38:20.446852", + "step": 1300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:20.498939", + "step": 1300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03956538066267967, + "timestamp": "2025-09-10 02:38:20.509244", + "step": 1301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:38:20.581928", + "step": 1301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031188693828880787, + "timestamp": "2025-09-10 02:38:20.595388", + "step": 1302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:20.648410", + "step": 1302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01015661470592022, + "timestamp": "2025-09-10 02:38:20.650492", + "step": 1303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:20.709248", + "step": 1303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020863953977823257, + "timestamp": "2025-09-10 02:38:20.715124", + "step": 1304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:38:20.781036", + "step": 1304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008217426016926765, + "timestamp": "2025-09-10 02:38:20.794757", + "step": 1305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:38:20.853465", + "step": 1305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015589231625199318, + "timestamp": "2025-09-10 02:38:20.855437", + "step": 1306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:20.908389", + "step": 1306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020488901063799858, + "timestamp": "2025-09-10 02:38:20.914876", + "step": 1307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:20.975170", + "step": 1307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024042991921305656, + "timestamp": "2025-09-10 02:38:20.986301", + "step": 1308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:21.038706", + "step": 1308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011314533650875092, + "timestamp": "2025-09-10 02:38:21.041683", + "step": 1309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:21.096955", + "step": 1309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035076974891126156, + "timestamp": "2025-09-10 02:38:21.098895", + "step": 1310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:21.151700", + "step": 1310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025932367891073227, + "timestamp": "2025-09-10 02:38:21.154654", + "step": 1311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:21.211634", + "step": 1311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012541480362415314, + "timestamp": "2025-09-10 02:38:21.218881", + "step": 1312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:21.271265", + "step": 1312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006389283575117588, + "timestamp": "2025-09-10 02:38:21.279452", + "step": 1313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:21.333281", + "step": 1313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009228929877281189, + "timestamp": "2025-09-10 02:38:21.342924", + "step": 1314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:21.397526", + "step": 1314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029494483023881912, + "timestamp": "2025-09-10 02:38:21.399812", + "step": 1315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:21.457729", + "step": 1315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007591061759740114, + "timestamp": "2025-09-10 02:38:21.466724", + "step": 1316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:38:21.531152", + "step": 1316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004441217519342899, + "timestamp": "2025-09-10 02:38:21.542981", + "step": 1317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:38:21.612270", + "step": 1317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007398766931146383, + "timestamp": "2025-09-10 02:38:21.624951", + "step": 1318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:21.678636", + "step": 1318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026653682813048363, + "timestamp": "2025-09-10 02:38:21.680728", + "step": 1319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:21.734595", + "step": 1319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015224191593006253, + "timestamp": "2025-09-10 02:38:21.745139", + "step": 1320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:21.808181", + "step": 1320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0419372096657753, + "timestamp": "2025-09-10 02:38:21.810390", + "step": 1321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:21.863532", + "step": 1321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029287930577993393, + "timestamp": "2025-09-10 02:38:21.866645", + "step": 1322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:21.921366", + "step": 1322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010279752314090729, + "timestamp": "2025-09-10 02:38:21.931136", + "step": 1323, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:38:38.845893", + "step": 1323, + "epoch": 1 + }, + { + "type": "pplx", + "content": 21127182.231462687, + "timestamp": "2025-09-10 02:38:38.848489", + "step": 1323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:38.905350", + "step": 1323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011432381346821785, + "timestamp": "2025-09-10 02:38:38.916599", + "step": 1324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:38.969748", + "step": 1324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021527279168367386, + "timestamp": "2025-09-10 02:38:38.979248", + "step": 1325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:39.032831", + "step": 1325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005625096149742603, + "timestamp": "2025-09-10 02:38:39.040710", + "step": 1326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:39.099669", + "step": 1326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018424388021230698, + "timestamp": "2025-09-10 02:38:39.110124", + "step": 1327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:39.163479", + "step": 1327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010245404206216335, + "timestamp": "2025-09-10 02:38:39.169640", + "step": 1328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:39.226121", + "step": 1328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026787420734763145, + "timestamp": "2025-09-10 02:38:39.227988", + "step": 1329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:39.282499", + "step": 1329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0073931836523115635, + "timestamp": "2025-09-10 02:38:39.292309", + "step": 1330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:39.353274", + "step": 1330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02826576866209507, + "timestamp": "2025-09-10 02:38:39.363759", + "step": 1331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:39.417634", + "step": 1331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006794530898332596, + "timestamp": "2025-09-10 02:38:39.423989", + "step": 1332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:39.478173", + "step": 1332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026355987414717674, + "timestamp": "2025-09-10 02:38:39.488223", + "step": 1333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:39.541890", + "step": 1333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023742901161313057, + "timestamp": "2025-09-10 02:38:39.550036", + "step": 1334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:38:39.610318", + "step": 1334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025258231908082962, + "timestamp": "2025-09-10 02:38:39.621059", + "step": 1335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:39.679163", + "step": 1335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010027949698269367, + "timestamp": "2025-09-10 02:38:39.690379", + "step": 1336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:39.743662", + "step": 1336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029624791350215673, + "timestamp": "2025-09-10 02:38:39.745724", + "step": 1337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:39.805317", + "step": 1337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006062061991542578, + "timestamp": "2025-09-10 02:38:39.807438", + "step": 1338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:39.860548", + "step": 1338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023339737206697464, + "timestamp": "2025-09-10 02:38:39.862604", + "step": 1339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:39.916303", + "step": 1339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011857804842293262, + "timestamp": "2025-09-10 02:38:39.926712", + "step": 1340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:39.979518", + "step": 1340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0049182153306901455, + "timestamp": "2025-09-10 02:38:39.986213", + "step": 1341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:40.041688", + "step": 1341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034381321165710688, + "timestamp": "2025-09-10 02:38:40.048456", + "step": 1342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:40.102040", + "step": 1342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008707774803042412, + "timestamp": "2025-09-10 02:38:40.104026", + "step": 1343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:40.157687", + "step": 1343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00808065664023161, + "timestamp": "2025-09-10 02:38:40.168100", + "step": 1344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:40.220159", + "step": 1344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03678285330533981, + "timestamp": "2025-09-10 02:38:40.222078", + "step": 1345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:40.274560", + "step": 1345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008914128877222538, + "timestamp": "2025-09-10 02:38:40.283000", + "step": 1346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:40.336229", + "step": 1346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018418842228129506, + "timestamp": "2025-09-10 02:38:40.338165", + "step": 1347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:40.391326", + "step": 1347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011227915063500404, + "timestamp": "2025-09-10 02:38:40.396809", + "step": 1348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:40.449601", + "step": 1348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015851123025640845, + "timestamp": "2025-09-10 02:38:40.456051", + "step": 1349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:40.509949", + "step": 1349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006693967618048191, + "timestamp": "2025-09-10 02:38:40.511917", + "step": 1350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:40.566108", + "step": 1350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026232196018099785, + "timestamp": "2025-09-10 02:38:40.575906", + "step": 1351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:38:40.637344", + "step": 1351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006545294541865587, + "timestamp": "2025-09-10 02:38:40.649037", + "step": 1352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:40.703453", + "step": 1352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012144649401307106, + "timestamp": "2025-09-10 02:38:40.713975", + "step": 1353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:40.767520", + "step": 1353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011797365732491016, + "timestamp": "2025-09-10 02:38:40.773991", + "step": 1354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:38:40.827381", + "step": 1354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004145797807723284, + "timestamp": "2025-09-10 02:38:40.829317", + "step": 1355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:40.882476", + "step": 1355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052191197872161865, + "timestamp": "2025-09-10 02:38:40.888491", + "step": 1356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:38:40.947686", + "step": 1356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026247352361679077, + "timestamp": "2025-09-10 02:38:40.959452", + "step": 1357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:41.013898", + "step": 1357, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003235304495319724, + "timestamp": "2025-09-10 02:38:41.015819", + "step": 1358, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:41.070586", + "step": 1358, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010302268899977207, + "timestamp": "2025-09-10 02:38:41.080400", + "step": 1359, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:41.133891", + "step": 1359, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00330765126273036, + "timestamp": "2025-09-10 02:38:41.139543", + "step": 1360, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:41.192648", + "step": 1360, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008435637690126896, + "timestamp": "2025-09-10 02:38:41.198465", + "step": 1361, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:41.252867", + "step": 1361, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01547850389033556, + "timestamp": "2025-09-10 02:38:41.262650", + "step": 1362, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:38:41.325952", + "step": 1362, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031536929309368134, + "timestamp": "2025-09-10 02:38:41.336713", + "step": 1363, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:41.397479", + "step": 1363, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003552355570718646, + "timestamp": "2025-09-10 02:38:41.404390", + "step": 1364, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:38:41.475978", + "step": 1364, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002945947926491499, + "timestamp": "2025-09-10 02:38:41.487804", + "step": 1365, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:41.555636", + "step": 1365, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025460409000515938, + "timestamp": "2025-09-10 02:38:41.561901", + "step": 1366, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:41.617729", + "step": 1366, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04281119629740715, + "timestamp": "2025-09-10 02:38:41.620380", + "step": 1367, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:41.674635", + "step": 1367, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05582251399755478, + "timestamp": "2025-09-10 02:38:41.683307", + "step": 1368, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:41.739006", + "step": 1368, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013911671936511993, + "timestamp": "2025-09-10 02:38:41.741881", + "step": 1369, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:41.804215", + "step": 1369, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03550497815012932, + "timestamp": "2025-09-10 02:38:41.814425", + "step": 1370, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:41.883785", + "step": 1370, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022621911019086838, + "timestamp": "2025-09-10 02:38:41.889585", + "step": 1371, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:38:41.964496", + "step": 1371, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03409972041845322, + "timestamp": "2025-09-10 02:38:41.975760", + "step": 1372, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:42.031319", + "step": 1372, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010691115632653236, + "timestamp": "2025-09-10 02:38:42.038071", + "step": 1373, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:42.092892", + "step": 1373, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025426147505640984, + "timestamp": "2025-09-10 02:38:42.097110", + "step": 1374, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:42.153376", + "step": 1374, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013123149052262306, + "timestamp": "2025-09-10 02:38:42.156693", + "step": 1375, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:42.212824", + "step": 1375, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009240327402949333, + "timestamp": "2025-09-10 02:38:42.221340", + "step": 1376, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:38:42.276791", + "step": 1376, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005370268132537603, + "timestamp": "2025-09-10 02:38:42.279578", + "step": 1377, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:42.334280", + "step": 1377, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016543889651075006, + "timestamp": "2025-09-10 02:38:42.340883", + "step": 1378, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:38:42.400070", + "step": 1378, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034547124058008194, + "timestamp": "2025-09-10 02:38:42.411788", + "step": 1379, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 12480075828672.0 + }, + "timestamp": "2025-09-10 02:38:42.515223", + "step": 1379, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01084803231060505, + "timestamp": "2025-09-10 02:38:42.533321", + "step": 1380, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:42.592123", + "step": 1380, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008962323889136314, + "timestamp": "2025-09-10 02:38:42.598475", + "step": 1381, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:42.663984", + "step": 1381, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0072692567482590675, + "timestamp": "2025-09-10 02:38:42.667489", + "step": 1382, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:38:42.750598", + "step": 1382, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020620524883270264, + "timestamp": "2025-09-10 02:38:42.764289", + "step": 1383, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:42.823775", + "step": 1383, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019325902685523033, + "timestamp": "2025-09-10 02:38:42.834332", + "step": 1384, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:42.889555", + "step": 1384, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010693066753447056, + "timestamp": "2025-09-10 02:38:42.891779", + "step": 1385, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:42.957093", + "step": 1385, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01635764166712761, + "timestamp": "2025-09-10 02:38:42.960885", + "step": 1386, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:43.024361", + "step": 1386, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019610950723290443, + "timestamp": "2025-09-10 02:38:43.031730", + "step": 1387, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:43.087319", + "step": 1387, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021304180845618248, + "timestamp": "2025-09-10 02:38:43.093205", + "step": 1388, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:43.148937", + "step": 1388, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022610317915678024, + "timestamp": "2025-09-10 02:38:43.150982", + "step": 1389, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:43.204180", + "step": 1389, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009096337482333183, + "timestamp": "2025-09-10 02:38:43.206113", + "step": 1390, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:43.259266", + "step": 1390, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006592115852981806, + "timestamp": "2025-09-10 02:38:43.261515", + "step": 1391, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:38:43.314969", + "step": 1391, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01335006020963192, + "timestamp": "2025-09-10 02:38:43.320807", + "step": 1392, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:43.373808", + "step": 1392, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019167417660355568, + "timestamp": "2025-09-10 02:38:43.375989", + "step": 1393, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:43.430965", + "step": 1393, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015288623049855232, + "timestamp": "2025-09-10 02:38:43.440701", + "step": 1394, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:38:43.509661", + "step": 1394, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005163759924471378, + "timestamp": "2025-09-10 02:38:43.522148", + "step": 1395, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:43.576547", + "step": 1395, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010330254212021828, + "timestamp": "2025-09-10 02:38:43.583066", + "step": 1396, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:43.635902", + "step": 1396, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011865033768117428, + "timestamp": "2025-09-10 02:38:43.638683", + "step": 1397, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:43.691774", + "step": 1397, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016444692388176918, + "timestamp": "2025-09-10 02:38:43.693981", + "step": 1398, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:43.746753", + "step": 1398, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005546797998249531, + "timestamp": "2025-09-10 02:38:43.748836", + "step": 1399, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:38:43.801957", + "step": 1399, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014152840711176395, + "timestamp": "2025-09-10 02:38:43.808037", + "step": 1400, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:43.860395", + "step": 1400, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00697662029415369, + "timestamp": "2025-09-10 02:38:43.862314", + "step": 1401, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:43.915506", + "step": 1401, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00883783120661974, + "timestamp": "2025-09-10 02:38:43.918007", + "step": 1402, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:43.970843", + "step": 1402, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0286136232316494, + "timestamp": "2025-09-10 02:38:43.977211", + "step": 1403, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:44.030119", + "step": 1403, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03485722467303276, + "timestamp": "2025-09-10 02:38:44.036031", + "step": 1404, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:38:44.124002", + "step": 1404, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029262211173772812, + "timestamp": "2025-09-10 02:38:44.142716", + "step": 1405, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:44.196660", + "step": 1405, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004903446417301893, + "timestamp": "2025-09-10 02:38:44.198840", + "step": 1406, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:44.253381", + "step": 1406, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007456667721271515, + "timestamp": "2025-09-10 02:38:44.255301", + "step": 1407, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:44.308243", + "step": 1407, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010532831773161888, + "timestamp": "2025-09-10 02:38:44.314077", + "step": 1408, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:44.366642", + "step": 1408, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020802823826670647, + "timestamp": "2025-09-10 02:38:44.368893", + "step": 1409, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:44.422932", + "step": 1409, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02434101514518261, + "timestamp": "2025-09-10 02:38:44.426192", + "step": 1410, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:44.479712", + "step": 1410, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014293080195784569, + "timestamp": "2025-09-10 02:38:44.482678", + "step": 1411, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:44.535776", + "step": 1411, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03320017457008362, + "timestamp": "2025-09-10 02:38:44.541568", + "step": 1412, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:44.593998", + "step": 1412, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025716817006468773, + "timestamp": "2025-09-10 02:38:44.595969", + "step": 1413, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:44.648682", + "step": 1413, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008264877833425999, + "timestamp": "2025-09-10 02:38:44.651056", + "step": 1414, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:44.704272", + "step": 1414, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01690601371228695, + "timestamp": "2025-09-10 02:38:44.707370", + "step": 1415, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:44.760953", + "step": 1415, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008486853912472725, + "timestamp": "2025-09-10 02:38:44.771182", + "step": 1416, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:44.825214", + "step": 1416, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013896196149289608, + "timestamp": "2025-09-10 02:38:44.827113", + "step": 1417, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:44.880160", + "step": 1417, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02865840122103691, + "timestamp": "2025-09-10 02:38:44.882280", + "step": 1418, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:44.935890", + "step": 1418, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00812804326415062, + "timestamp": "2025-09-10 02:38:44.943814", + "step": 1419, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:44.996775", + "step": 1419, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00727180065587163, + "timestamp": "2025-09-10 02:38:45.003571", + "step": 1420, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:45.055310", + "step": 1420, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007957945577800274, + "timestamp": "2025-09-10 02:38:45.058411", + "step": 1421, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:45.112729", + "step": 1421, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02688734047114849, + "timestamp": "2025-09-10 02:38:45.115101", + "step": 1422, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:45.168890", + "step": 1422, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05237075686454773, + "timestamp": "2025-09-10 02:38:45.170978", + "step": 1423, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:45.224712", + "step": 1423, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015660319477319717, + "timestamp": "2025-09-10 02:38:45.230725", + "step": 1424, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:45.283283", + "step": 1424, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025016695726662874, + "timestamp": "2025-09-10 02:38:45.285497", + "step": 1425, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:45.338341", + "step": 1425, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016082679852843285, + "timestamp": "2025-09-10 02:38:45.340288", + "step": 1426, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:38:45.393394", + "step": 1426, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01544844452291727, + "timestamp": "2025-09-10 02:38:45.403037", + "step": 1427, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:45.456029", + "step": 1427, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01196068525314331, + "timestamp": "2025-09-10 02:38:45.461700", + "step": 1428, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:45.513865", + "step": 1428, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02642877586185932, + "timestamp": "2025-09-10 02:38:45.515772", + "step": 1429, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:45.568290", + "step": 1429, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01701239123940468, + "timestamp": "2025-09-10 02:38:45.570344", + "step": 1430, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:38:45.666795", + "step": 1430, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014857952482998371, + "timestamp": "2025-09-10 02:38:45.685328", + "step": 1431, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:45.738918", + "step": 1431, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024639811366796494, + "timestamp": "2025-09-10 02:38:45.745009", + "step": 1432, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:45.798548", + "step": 1432, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027349967509508133, + "timestamp": "2025-09-10 02:38:45.800634", + "step": 1433, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:45.854159", + "step": 1433, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023200364783406258, + "timestamp": "2025-09-10 02:38:45.856319", + "step": 1434, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:45.910691", + "step": 1434, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010355520062148571, + "timestamp": "2025-09-10 02:38:45.917579", + "step": 1435, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:45.972234", + "step": 1435, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009227151982486248, + "timestamp": "2025-09-10 02:38:45.979473", + "step": 1436, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:46.032272", + "step": 1436, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010424159467220306, + "timestamp": "2025-09-10 02:38:46.034531", + "step": 1437, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:38:46.088130", + "step": 1437, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01785074546933174, + "timestamp": "2025-09-10 02:38:46.090568", + "step": 1438, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:46.145242", + "step": 1438, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009487361647188663, + "timestamp": "2025-09-10 02:38:46.147263", + "step": 1439, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:46.200582", + "step": 1439, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009929628111422062, + "timestamp": "2025-09-10 02:38:46.207919", + "step": 1440, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:46.260240", + "step": 1440, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009848659858107567, + "timestamp": "2025-09-10 02:38:46.268460", + "step": 1441, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:46.322624", + "step": 1441, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01061181165277958, + "timestamp": "2025-09-10 02:38:46.325009", + "step": 1442, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:46.378347", + "step": 1442, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012384439818561077, + "timestamp": "2025-09-10 02:38:46.380457", + "step": 1443, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:46.433725", + "step": 1443, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014429338276386261, + "timestamp": "2025-09-10 02:38:46.439619", + "step": 1444, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:46.491949", + "step": 1444, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038031567819416523, + "timestamp": "2025-09-10 02:38:46.494063", + "step": 1445, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:46.547893", + "step": 1445, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008766873739659786, + "timestamp": "2025-09-10 02:38:46.549923", + "step": 1446, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:46.603260", + "step": 1446, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011563843116164207, + "timestamp": "2025-09-10 02:38:46.609676", + "step": 1447, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:38:46.678116", + "step": 1447, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014462672173976898, + "timestamp": "2025-09-10 02:38:46.691494", + "step": 1448, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:46.745088", + "step": 1448, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009715719148516655, + "timestamp": "2025-09-10 02:38:46.755591", + "step": 1449, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:46.809121", + "step": 1449, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012340018525719643, + "timestamp": "2025-09-10 02:38:46.815190", + "step": 1450, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:38:46.868235", + "step": 1450, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00776516692712903, + "timestamp": "2025-09-10 02:38:46.870326", + "step": 1451, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:38:46.924031", + "step": 1451, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015312569215893745, + "timestamp": "2025-09-10 02:38:46.930721", + "step": 1452, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:46.983891", + "step": 1452, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01902756094932556, + "timestamp": "2025-09-10 02:38:46.986056", + "step": 1453, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:47.040782", + "step": 1453, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013507343828678131, + "timestamp": "2025-09-10 02:38:47.050588", + "step": 1454, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:47.104232", + "step": 1454, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0173882395029068, + "timestamp": "2025-09-10 02:38:47.112223", + "step": 1455, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:47.165319", + "step": 1455, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010136603377759457, + "timestamp": "2025-09-10 02:38:47.171045", + "step": 1456, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:47.223749", + "step": 1456, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01279307622462511, + "timestamp": "2025-09-10 02:38:47.231499", + "step": 1457, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:47.285328", + "step": 1457, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010654272511601448, + "timestamp": "2025-09-10 02:38:47.287262", + "step": 1458, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:38:47.340376", + "step": 1458, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011478336527943611, + "timestamp": "2025-09-10 02:38:47.342443", + "step": 1459, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:38:47.396004", + "step": 1459, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03447409346699715, + "timestamp": "2025-09-10 02:38:47.401916", + "step": 1460, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:47.454169", + "step": 1460, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007755897007882595, + "timestamp": "2025-09-10 02:38:47.456112", + "step": 1461, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:38:47.509116", + "step": 1461, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017004257068037987, + "timestamp": "2025-09-10 02:38:47.511854", + "step": 1462, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:38:47.564823", + "step": 1462, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024744782596826553, + "timestamp": "2025-09-10 02:38:47.572785", + "step": 1463, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:47.626665", + "step": 1463, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010759426280856133, + "timestamp": "2025-09-10 02:38:47.633267", + "step": 1464, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:38:47.691053", + "step": 1464, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034266144037246704, + "timestamp": "2025-09-10 02:38:47.693184", + "step": 1465, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:38:47.767127", + "step": 1465, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028431007638573647, + "timestamp": "2025-09-10 02:38:47.780824", + "step": 1466, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:38:47.848015", + "step": 1466, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017007270827889442, + "timestamp": "2025-09-10 02:38:47.860188", + "step": 1467, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:38:47.914786", + "step": 1467, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008189934305846691, + "timestamp": "2025-09-10 02:38:47.920735", + "step": 1468, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:38:47.975536", + "step": 1468, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020122379064559937, + "timestamp": "2025-09-10 02:38:47.986031", + "step": 1469, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:38:48.058955", + "step": 1469, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027661120519042015, + "timestamp": "2025-09-10 02:38:48.072415", + "step": 1470, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:39:04.929026", + "step": 1470, + "epoch": 1 + }, + { + "type": "pplx", + "content": 21173319.91827306, + "timestamp": "2025-09-10 02:39:04.932880", + "step": 1470, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:04.993661", + "step": 1470, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022806530818343163, + "timestamp": "2025-09-10 02:39:04.998657", + "step": 1471, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:39:05.068544", + "step": 1471, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018918577581644058, + "timestamp": "2025-09-10 02:39:05.081934", + "step": 1472, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:05.137273", + "step": 1472, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004286808427423239, + "timestamp": "2025-09-10 02:39:05.139591", + "step": 1473, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:05.192617", + "step": 1473, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018037041649222374, + "timestamp": "2025-09-10 02:39:05.200378", + "step": 1474, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:05.255050", + "step": 1474, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007983874529600143, + "timestamp": "2025-09-10 02:39:05.264892", + "step": 1475, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:05.318467", + "step": 1475, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018270786851644516, + "timestamp": "2025-09-10 02:39:05.328878", + "step": 1476, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:05.385765", + "step": 1476, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039264630526304245, + "timestamp": "2025-09-10 02:39:05.388058", + "step": 1477, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:05.446577", + "step": 1477, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011250613257288933, + "timestamp": "2025-09-10 02:39:05.456202", + "step": 1478, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:05.510046", + "step": 1478, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013807500712573528, + "timestamp": "2025-09-10 02:39:05.512074", + "step": 1479, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:05.565778", + "step": 1479, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005317857023328543, + "timestamp": "2025-09-10 02:39:05.576176", + "step": 1480, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:05.628903", + "step": 1480, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006169492844492197, + "timestamp": "2025-09-10 02:39:05.637427", + "step": 1481, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:05.696385", + "step": 1481, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007268204353749752, + "timestamp": "2025-09-10 02:39:05.704041", + "step": 1482, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:05.759583", + "step": 1482, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0052519855089485645, + "timestamp": "2025-09-10 02:39:05.769153", + "step": 1483, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:05.822324", + "step": 1483, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002324948785826564, + "timestamp": "2025-09-10 02:39:05.828432", + "step": 1484, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:05.881169", + "step": 1484, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014736413955688477, + "timestamp": "2025-09-10 02:39:05.886820", + "step": 1485, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:05.942933", + "step": 1485, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007762397173792124, + "timestamp": "2025-09-10 02:39:05.944976", + "step": 1486, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:05.999591", + "step": 1486, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007638792973011732, + "timestamp": "2025-09-10 02:39:06.001888", + "step": 1487, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:06.054626", + "step": 1487, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020237712189555168, + "timestamp": "2025-09-10 02:39:06.062955", + "step": 1488, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:06.115814", + "step": 1488, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005117752123624086, + "timestamp": "2025-09-10 02:39:06.121374", + "step": 1489, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:39:06.187026", + "step": 1489, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018134128767997026, + "timestamp": "2025-09-10 02:39:06.198103", + "step": 1490, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:39:06.251161", + "step": 1490, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006517014931887388, + "timestamp": "2025-09-10 02:39:06.253516", + "step": 1491, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:06.306640", + "step": 1491, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018086465075612068, + "timestamp": "2025-09-10 02:39:06.312842", + "step": 1492, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:06.366823", + "step": 1492, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024628931656479836, + "timestamp": "2025-09-10 02:39:06.372224", + "step": 1493, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:06.426167", + "step": 1493, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01478238869458437, + "timestamp": "2025-09-10 02:39:06.428663", + "step": 1494, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:06.485621", + "step": 1494, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020374033600091934, + "timestamp": "2025-09-10 02:39:06.488179", + "step": 1495, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:06.542787", + "step": 1495, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0061873276717960835, + "timestamp": "2025-09-10 02:39:06.549333", + "step": 1496, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:06.603791", + "step": 1496, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014341110363602638, + "timestamp": "2025-09-10 02:39:06.612304", + "step": 1497, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:06.677774", + "step": 1497, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03801755979657173, + "timestamp": "2025-09-10 02:39:06.679765", + "step": 1498, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:06.732843", + "step": 1498, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012635404709726572, + "timestamp": "2025-09-10 02:39:06.734899", + "step": 1499, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:06.788983", + "step": 1499, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02247563563287258, + "timestamp": "2025-09-10 02:39:06.795243", + "step": 1500, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 1500", + "timestamp": "2025-09-10 02:39:07.166577", + "step": 1500, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:07.222639", + "step": 1500, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012357601895928383, + "timestamp": "2025-09-10 02:39:07.224899", + "step": 1501, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:07.280752", + "step": 1501, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03305893391370773, + "timestamp": "2025-09-10 02:39:07.284600", + "step": 1502, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:07.339647", + "step": 1502, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006188906729221344, + "timestamp": "2025-09-10 02:39:07.341993", + "step": 1503, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:39:07.415637", + "step": 1503, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038431433495134115, + "timestamp": "2025-09-10 02:39:07.430043", + "step": 1504, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:07.495701", + "step": 1504, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027286021038889885, + "timestamp": "2025-09-10 02:39:07.497949", + "step": 1505, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:07.553504", + "step": 1505, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029850173741579056, + "timestamp": "2025-09-10 02:39:07.555623", + "step": 1506, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:07.608180", + "step": 1506, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019115885719656944, + "timestamp": "2025-09-10 02:39:07.610304", + "step": 1507, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:39:07.678674", + "step": 1507, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008542048744857311, + "timestamp": "2025-09-10 02:39:07.692159", + "step": 1508, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:07.791964", + "step": 1508, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011884269304573536, + "timestamp": "2025-09-10 02:39:07.799957", + "step": 1509, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:07.852964", + "step": 1509, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008217772468924522, + "timestamp": "2025-09-10 02:39:07.856834", + "step": 1510, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:07.910810", + "step": 1510, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011131289415061474, + "timestamp": "2025-09-10 02:39:07.913061", + "step": 1511, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:07.966523", + "step": 1511, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011722886934876442, + "timestamp": "2025-09-10 02:39:07.972634", + "step": 1512, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:08.025168", + "step": 1512, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011907918378710747, + "timestamp": "2025-09-10 02:39:08.027446", + "step": 1513, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:08.084439", + "step": 1513, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008978028781712055, + "timestamp": "2025-09-10 02:39:08.086830", + "step": 1514, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:08.139448", + "step": 1514, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00045794612378813326, + "timestamp": "2025-09-10 02:39:08.141892", + "step": 1515, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:08.196005", + "step": 1515, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011565404012799263, + "timestamp": "2025-09-10 02:39:08.206381", + "step": 1516, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:08.265451", + "step": 1516, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01076526939868927, + "timestamp": "2025-09-10 02:39:08.270835", + "step": 1517, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:08.329067", + "step": 1517, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018057377310469747, + "timestamp": "2025-09-10 02:39:08.335580", + "step": 1518, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:08.399503", + "step": 1518, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014335406012833118, + "timestamp": "2025-09-10 02:39:08.409284", + "step": 1519, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:08.462729", + "step": 1519, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009265930391848087, + "timestamp": "2025-09-10 02:39:08.468642", + "step": 1520, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:08.520442", + "step": 1520, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008001809939742088, + "timestamp": "2025-09-10 02:39:08.523454", + "step": 1521, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:08.576088", + "step": 1521, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0332258865237236, + "timestamp": "2025-09-10 02:39:08.578634", + "step": 1522, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:08.632707", + "step": 1522, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023234251886606216, + "timestamp": "2025-09-10 02:39:08.635230", + "step": 1523, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:08.689217", + "step": 1523, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002109512919560075, + "timestamp": "2025-09-10 02:39:08.694950", + "step": 1524, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:08.747772", + "step": 1524, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014900286681950092, + "timestamp": "2025-09-10 02:39:08.750635", + "step": 1525, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:39:08.803372", + "step": 1525, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007958031259477139, + "timestamp": "2025-09-10 02:39:08.805541", + "step": 1526, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:39:08.873478", + "step": 1526, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0024172370322048664, + "timestamp": "2025-09-10 02:39:08.886012", + "step": 1527, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:08.940427", + "step": 1527, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004101971630007029, + "timestamp": "2025-09-10 02:39:08.950830", + "step": 1528, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:09.003198", + "step": 1528, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01997605338692665, + "timestamp": "2025-09-10 02:39:09.005446", + "step": 1529, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:09.058375", + "step": 1529, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0052584256045520306, + "timestamp": "2025-09-10 02:39:09.060380", + "step": 1530, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:09.113259", + "step": 1530, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009853395633399487, + "timestamp": "2025-09-10 02:39:09.121363", + "step": 1531, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:09.175020", + "step": 1531, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007065530400723219, + "timestamp": "2025-09-10 02:39:09.180627", + "step": 1532, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:39:09.252326", + "step": 1532, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01046669390052557, + "timestamp": "2025-09-10 02:39:09.267266", + "step": 1533, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:09.320408", + "step": 1533, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01111555565148592, + "timestamp": "2025-09-10 02:39:09.326936", + "step": 1534, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:09.380736", + "step": 1534, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008722702972590923, + "timestamp": "2025-09-10 02:39:09.382947", + "step": 1535, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:09.435352", + "step": 1535, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06579513102769852, + "timestamp": "2025-09-10 02:39:09.441404", + "step": 1536, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:09.493662", + "step": 1536, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012011303333565593, + "timestamp": "2025-09-10 02:39:09.496198", + "step": 1537, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:09.550039", + "step": 1537, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02228461392223835, + "timestamp": "2025-09-10 02:39:09.552321", + "step": 1538, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:09.605023", + "step": 1538, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008895862847566605, + "timestamp": "2025-09-10 02:39:09.611537", + "step": 1539, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:09.664944", + "step": 1539, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022968510165810585, + "timestamp": "2025-09-10 02:39:09.670748", + "step": 1540, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:09.726879", + "step": 1540, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0167404692620039, + "timestamp": "2025-09-10 02:39:09.729142", + "step": 1541, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:09.782058", + "step": 1541, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039507247507572174, + "timestamp": "2025-09-10 02:39:09.790012", + "step": 1542, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:09.842983", + "step": 1542, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002729188185185194, + "timestamp": "2025-09-10 02:39:09.845109", + "step": 1543, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:09.898474", + "step": 1543, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0065974099561572075, + "timestamp": "2025-09-10 02:39:09.905776", + "step": 1544, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:39:09.964604", + "step": 1544, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029151104390621185, + "timestamp": "2025-09-10 02:39:09.976127", + "step": 1545, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:10.029686", + "step": 1545, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009200065396726131, + "timestamp": "2025-09-10 02:39:10.031840", + "step": 1546, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:10.085454", + "step": 1546, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022537946701049805, + "timestamp": "2025-09-10 02:39:10.087666", + "step": 1547, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:10.140388", + "step": 1547, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033098287880420685, + "timestamp": "2025-09-10 02:39:10.147656", + "step": 1548, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:10.200256", + "step": 1548, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005940968636423349, + "timestamp": "2025-09-10 02:39:10.203017", + "step": 1549, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:10.256150", + "step": 1549, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009414262138307095, + "timestamp": "2025-09-10 02:39:10.258425", + "step": 1550, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:10.311554", + "step": 1550, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006693967618048191, + "timestamp": "2025-09-10 02:39:10.319683", + "step": 1551, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:10.372536", + "step": 1551, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012858742848038673, + "timestamp": "2025-09-10 02:39:10.378316", + "step": 1552, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:10.430599", + "step": 1552, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025713259354233742, + "timestamp": "2025-09-10 02:39:10.432831", + "step": 1553, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:10.486192", + "step": 1553, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004746002610772848, + "timestamp": "2025-09-10 02:39:10.488545", + "step": 1554, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:10.542130", + "step": 1554, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03677273541688919, + "timestamp": "2025-09-10 02:39:10.544321", + "step": 1555, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:10.599015", + "step": 1555, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001012719003483653, + "timestamp": "2025-09-10 02:39:10.609619", + "step": 1556, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:10.661416", + "step": 1556, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012111333198845387, + "timestamp": "2025-09-10 02:39:10.664596", + "step": 1557, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:10.718119", + "step": 1557, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029442133381962776, + "timestamp": "2025-09-10 02:39:10.720277", + "step": 1558, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:10.773200", + "step": 1558, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04144950583577156, + "timestamp": "2025-09-10 02:39:10.775539", + "step": 1559, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:10.827983", + "step": 1559, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002831295132637024, + "timestamp": "2025-09-10 02:39:10.833746", + "step": 1560, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:10.886531", + "step": 1560, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00786892231553793, + "timestamp": "2025-09-10 02:39:10.896155", + "step": 1561, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:10.950935", + "step": 1561, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005282655358314514, + "timestamp": "2025-09-10 02:39:10.960731", + "step": 1562, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:11.016101", + "step": 1562, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00401962548494339, + "timestamp": "2025-09-10 02:39:11.018230", + "step": 1563, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:11.071569", + "step": 1563, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02013194002211094, + "timestamp": "2025-09-10 02:39:11.077338", + "step": 1564, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:11.134995", + "step": 1564, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0010756131960079074, + "timestamp": "2025-09-10 02:39:11.137022", + "step": 1565, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:11.190643", + "step": 1565, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02974933385848999, + "timestamp": "2025-09-10 02:39:11.192905", + "step": 1566, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:11.245903", + "step": 1566, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027059292420744896, + "timestamp": "2025-09-10 02:39:11.253991", + "step": 1567, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:11.306693", + "step": 1567, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018480705097317696, + "timestamp": "2025-09-10 02:39:11.314956", + "step": 1568, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:11.376439", + "step": 1568, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009361452423036098, + "timestamp": "2025-09-10 02:39:11.387682", + "step": 1569, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:11.444613", + "step": 1569, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005706433672457933, + "timestamp": "2025-09-10 02:39:11.446796", + "step": 1570, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:11.499847", + "step": 1570, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03452344983816147, + "timestamp": "2025-09-10 02:39:11.502757", + "step": 1571, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:39:11.563123", + "step": 1571, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003737966064363718, + "timestamp": "2025-09-10 02:39:11.574681", + "step": 1572, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:11.631549", + "step": 1572, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029324105009436607, + "timestamp": "2025-09-10 02:39:11.641739", + "step": 1573, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:11.694550", + "step": 1573, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015068896114826202, + "timestamp": "2025-09-10 02:39:11.696718", + "step": 1574, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:11.750302", + "step": 1574, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01612308993935585, + "timestamp": "2025-09-10 02:39:11.752451", + "step": 1575, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:11.809118", + "step": 1575, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01985299587249756, + "timestamp": "2025-09-10 02:39:11.814821", + "step": 1576, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:11.867681", + "step": 1576, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04363846778869629, + "timestamp": "2025-09-10 02:39:11.869986", + "step": 1577, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:11.923163", + "step": 1577, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008180653676390648, + "timestamp": "2025-09-10 02:39:11.925313", + "step": 1578, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:39:11.978005", + "step": 1578, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004834081511944532, + "timestamp": "2025-09-10 02:39:11.980087", + "step": 1579, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:12.033006", + "step": 1579, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01860627345740795, + "timestamp": "2025-09-10 02:39:12.039060", + "step": 1580, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:12.091821", + "step": 1580, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015397860668599606, + "timestamp": "2025-09-10 02:39:12.094212", + "step": 1581, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:12.147194", + "step": 1581, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002805066527798772, + "timestamp": "2025-09-10 02:39:12.149379", + "step": 1582, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:12.203446", + "step": 1582, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05582290142774582, + "timestamp": "2025-09-10 02:39:12.213053", + "step": 1583, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:12.265965", + "step": 1583, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009821859188377857, + "timestamp": "2025-09-10 02:39:12.271530", + "step": 1584, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:12.323768", + "step": 1584, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027155477553606033, + "timestamp": "2025-09-10 02:39:12.332182", + "step": 1585, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:12.386791", + "step": 1585, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007664863485842943, + "timestamp": "2025-09-10 02:39:12.396561", + "step": 1586, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:12.449924", + "step": 1586, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007052138447761536, + "timestamp": "2025-09-10 02:39:12.452136", + "step": 1587, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:12.505073", + "step": 1587, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010261270217597485, + "timestamp": "2025-09-10 02:39:12.512315", + "step": 1588, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:12.564950", + "step": 1588, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011753434082493186, + "timestamp": "2025-09-10 02:39:12.567149", + "step": 1589, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:12.620040", + "step": 1589, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005562249571084976, + "timestamp": "2025-09-10 02:39:12.628155", + "step": 1590, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:12.681399", + "step": 1590, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0065660723485052586, + "timestamp": "2025-09-10 02:39:12.688043", + "step": 1591, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:12.741044", + "step": 1591, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018280507996678352, + "timestamp": "2025-09-10 02:39:12.746801", + "step": 1592, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:39:12.798463", + "step": 1592, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006899405736476183, + "timestamp": "2025-09-10 02:39:12.800420", + "step": 1593, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:12.853795", + "step": 1593, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0059174662455916405, + "timestamp": "2025-09-10 02:39:12.863383", + "step": 1594, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:12.917096", + "step": 1594, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02331809140741825, + "timestamp": "2025-09-10 02:39:12.919352", + "step": 1595, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:12.972745", + "step": 1595, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017544277012348175, + "timestamp": "2025-09-10 02:39:12.978645", + "step": 1596, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:13.031055", + "step": 1596, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005049147177487612, + "timestamp": "2025-09-10 02:39:13.037712", + "step": 1597, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:13.091054", + "step": 1597, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027481592260301113, + "timestamp": "2025-09-10 02:39:13.097713", + "step": 1598, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:13.154354", + "step": 1598, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003927871584892273, + "timestamp": "2025-09-10 02:39:13.162979", + "step": 1599, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:13.229797", + "step": 1599, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011780605418607593, + "timestamp": "2025-09-10 02:39:13.241004", + "step": 1600, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:13.297617", + "step": 1600, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03326495364308357, + "timestamp": "2025-09-10 02:39:13.299841", + "step": 1601, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:13.357439", + "step": 1601, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029503485187888145, + "timestamp": "2025-09-10 02:39:13.367068", + "step": 1602, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:13.423063", + "step": 1602, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015550837852060795, + "timestamp": "2025-09-10 02:39:13.426067", + "step": 1603, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:13.481074", + "step": 1603, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008897035382688046, + "timestamp": "2025-09-10 02:39:13.491652", + "step": 1604, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:13.543628", + "step": 1604, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0055795880034565926, + "timestamp": "2025-09-10 02:39:13.546256", + "step": 1605, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:13.599530", + "step": 1605, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014626068994402885, + "timestamp": "2025-09-10 02:39:13.601798", + "step": 1606, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:13.654906", + "step": 1606, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02112056128680706, + "timestamp": "2025-09-10 02:39:13.657072", + "step": 1607, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:13.710612", + "step": 1607, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021492617204785347, + "timestamp": "2025-09-10 02:39:13.716397", + "step": 1608, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:13.770886", + "step": 1608, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008916768245398998, + "timestamp": "2025-09-10 02:39:13.773276", + "step": 1609, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:13.826737", + "step": 1609, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027872784063220024, + "timestamp": "2025-09-10 02:39:13.831432", + "step": 1610, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:13.885020", + "step": 1610, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009076380170881748, + "timestamp": "2025-09-10 02:39:13.887311", + "step": 1611, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:13.942930", + "step": 1611, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010817697271704674, + "timestamp": "2025-09-10 02:39:13.948775", + "step": 1612, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:39:14.007650", + "step": 1612, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01146281324326992, + "timestamp": "2025-09-10 02:39:14.019162", + "step": 1613, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:39:14.087202", + "step": 1613, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015638425946235657, + "timestamp": "2025-09-10 02:39:14.099801", + "step": 1614, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:14.153409", + "step": 1614, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029857028275728226, + "timestamp": "2025-09-10 02:39:14.155796", + "step": 1615, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:14.209191", + "step": 1615, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045049529522657394, + "timestamp": "2025-09-10 02:39:14.216567", + "step": 1616, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:14.272337", + "step": 1616, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021982843056321144, + "timestamp": "2025-09-10 02:39:14.274565", + "step": 1617, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:39:31.144710", + "step": 1617, + "epoch": 1 + }, + { + "type": "pplx", + "content": 20237851.867195703, + "timestamp": "2025-09-10 02:39:31.147543", + "step": 1617, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:31.202665", + "step": 1617, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004158311523497105, + "timestamp": "2025-09-10 02:39:31.204597", + "step": 1618, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:31.259206", + "step": 1618, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025820934679359198, + "timestamp": "2025-09-10 02:39:31.268231", + "step": 1619, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:31.322087", + "step": 1619, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013205154798924923, + "timestamp": "2025-09-10 02:39:31.328213", + "step": 1620, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:31.380908", + "step": 1620, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039854079484939575, + "timestamp": "2025-09-10 02:39:31.383660", + "step": 1621, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:31.439115", + "step": 1621, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03273060545325279, + "timestamp": "2025-09-10 02:39:31.448953", + "step": 1622, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:31.501939", + "step": 1622, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0293898768723011, + "timestamp": "2025-09-10 02:39:31.504850", + "step": 1623, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:31.561506", + "step": 1623, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003011903027072549, + "timestamp": "2025-09-10 02:39:31.568742", + "step": 1624, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:31.621285", + "step": 1624, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020315011963248253, + "timestamp": "2025-09-10 02:39:31.623264", + "step": 1625, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:31.676975", + "step": 1625, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02353397198021412, + "timestamp": "2025-09-10 02:39:31.686559", + "step": 1626, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:31.739782", + "step": 1626, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008481854572892189, + "timestamp": "2025-09-10 02:39:31.741705", + "step": 1627, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:31.794500", + "step": 1627, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01967434026300907, + "timestamp": "2025-09-10 02:39:31.800331", + "step": 1628, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:31.853077", + "step": 1628, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008929950185120106, + "timestamp": "2025-09-10 02:39:31.854901", + "step": 1629, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:31.907662", + "step": 1629, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00881776213645935, + "timestamp": "2025-09-10 02:39:31.914379", + "step": 1630, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:31.967709", + "step": 1630, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025654584169387817, + "timestamp": "2025-09-10 02:39:31.969708", + "step": 1631, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:32.022704", + "step": 1631, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016202174127101898, + "timestamp": "2025-09-10 02:39:32.030041", + "step": 1632, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:32.082321", + "step": 1632, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02773822471499443, + "timestamp": "2025-09-10 02:39:32.084353", + "step": 1633, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:32.137557", + "step": 1633, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011530165560543537, + "timestamp": "2025-09-10 02:39:32.143957", + "step": 1634, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:32.197255", + "step": 1634, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011132647283375263, + "timestamp": "2025-09-10 02:39:32.199367", + "step": 1635, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:32.252149", + "step": 1635, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033992708195000887, + "timestamp": "2025-09-10 02:39:32.257880", + "step": 1636, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:32.309887", + "step": 1636, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009040266275405884, + "timestamp": "2025-09-10 02:39:32.312763", + "step": 1637, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:32.365504", + "step": 1637, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00423599686473608, + "timestamp": "2025-09-10 02:39:32.367561", + "step": 1638, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:32.420192", + "step": 1638, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017923900857567787, + "timestamp": "2025-09-10 02:39:32.422295", + "step": 1639, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:32.475045", + "step": 1639, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008748088032007217, + "timestamp": "2025-09-10 02:39:32.480658", + "step": 1640, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:32.533070", + "step": 1640, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02162143588066101, + "timestamp": "2025-09-10 02:39:32.539741", + "step": 1641, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:32.593385", + "step": 1641, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005233997944742441, + "timestamp": "2025-09-10 02:39:32.595508", + "step": 1642, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:32.649292", + "step": 1642, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009843496605753899, + "timestamp": "2025-09-10 02:39:32.658915", + "step": 1643, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:32.712233", + "step": 1643, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004377818200737238, + "timestamp": "2025-09-10 02:39:32.718032", + "step": 1644, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:32.770683", + "step": 1644, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011794110760092735, + "timestamp": "2025-09-10 02:39:32.772818", + "step": 1645, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:32.825302", + "step": 1645, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021618753671646118, + "timestamp": "2025-09-10 02:39:32.827345", + "step": 1646, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:32.880206", + "step": 1646, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005219961516559124, + "timestamp": "2025-09-10 02:39:32.882218", + "step": 1647, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:39:32.942213", + "step": 1647, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0140523137524724, + "timestamp": "2025-09-10 02:39:32.953728", + "step": 1648, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:33.006353", + "step": 1648, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028450939804315567, + "timestamp": "2025-09-10 02:39:33.008425", + "step": 1649, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:33.061216", + "step": 1649, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019210809841752052, + "timestamp": "2025-09-10 02:39:33.064343", + "step": 1650, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:33.117667", + "step": 1650, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025495637208223343, + "timestamp": "2025-09-10 02:39:33.119869", + "step": 1651, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:33.172776", + "step": 1651, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02168445847928524, + "timestamp": "2025-09-10 02:39:33.178488", + "step": 1652, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:39:33.242840", + "step": 1652, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017340829595923424, + "timestamp": "2025-09-10 02:39:33.256189", + "step": 1653, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:33.309346", + "step": 1653, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017994971945881844, + "timestamp": "2025-09-10 02:39:33.311417", + "step": 1654, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:33.364962", + "step": 1654, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018361778929829597, + "timestamp": "2025-09-10 02:39:33.367023", + "step": 1655, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:33.419925", + "step": 1655, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0625004991889, + "timestamp": "2025-09-10 02:39:33.425552", + "step": 1656, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:39:33.490265", + "step": 1656, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032270364463329315, + "timestamp": "2025-09-10 02:39:33.503640", + "step": 1657, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:39:33.576738", + "step": 1657, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020126575604081154, + "timestamp": "2025-09-10 02:39:33.590372", + "step": 1658, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:33.643712", + "step": 1658, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012262661941349506, + "timestamp": "2025-09-10 02:39:33.645935", + "step": 1659, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:33.698571", + "step": 1659, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005183726083487272, + "timestamp": "2025-09-10 02:39:33.704191", + "step": 1660, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:33.757428", + "step": 1660, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019909026101231575, + "timestamp": "2025-09-10 02:39:33.768057", + "step": 1661, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:33.822888", + "step": 1661, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008784397505223751, + "timestamp": "2025-09-10 02:39:33.832847", + "step": 1662, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:39:33.907307", + "step": 1662, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04029487445950508, + "timestamp": "2025-09-10 02:39:33.921417", + "step": 1663, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:33.975507", + "step": 1663, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010022074915468693, + "timestamp": "2025-09-10 02:39:33.986207", + "step": 1664, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:34.038418", + "step": 1664, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007610289845615625, + "timestamp": "2025-09-10 02:39:34.041879", + "step": 1665, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:34.095521", + "step": 1665, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026863878592848778, + "timestamp": "2025-09-10 02:39:34.097600", + "step": 1666, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:34.150305", + "step": 1666, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03311929479241371, + "timestamp": "2025-09-10 02:39:34.152318", + "step": 1667, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:34.205870", + "step": 1667, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0065115662291646, + "timestamp": "2025-09-10 02:39:34.216392", + "step": 1668, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:34.269169", + "step": 1668, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008997505530714989, + "timestamp": "2025-09-10 02:39:34.277411", + "step": 1669, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:34.331262", + "step": 1669, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.044347260147333145, + "timestamp": "2025-09-10 02:39:34.333373", + "step": 1670, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:34.386369", + "step": 1670, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011004636995494366, + "timestamp": "2025-09-10 02:39:34.388562", + "step": 1671, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:34.441406", + "step": 1671, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0005539219710044563, + "timestamp": "2025-09-10 02:39:34.447107", + "step": 1672, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:34.500026", + "step": 1672, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012371420860290527, + "timestamp": "2025-09-10 02:39:34.501973", + "step": 1673, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:34.554456", + "step": 1673, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0060769012197852135, + "timestamp": "2025-09-10 02:39:34.561210", + "step": 1674, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:34.613987", + "step": 1674, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02393592707812786, + "timestamp": "2025-09-10 02:39:34.616382", + "step": 1675, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:34.670850", + "step": 1675, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02526310458779335, + "timestamp": "2025-09-10 02:39:34.681391", + "step": 1676, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:34.733577", + "step": 1676, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019062532112002373, + "timestamp": "2025-09-10 02:39:34.735779", + "step": 1677, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:34.788288", + "step": 1677, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002217465080320835, + "timestamp": "2025-09-10 02:39:34.790501", + "step": 1678, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:34.843711", + "step": 1678, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01228619460016489, + "timestamp": "2025-09-10 02:39:34.852047", + "step": 1679, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:39:34.921439", + "step": 1679, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011879702098667622, + "timestamp": "2025-09-10 02:39:34.935087", + "step": 1680, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:34.989851", + "step": 1680, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005328523926436901, + "timestamp": "2025-09-10 02:39:34.993376", + "step": 1681, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:39:35.056976", + "step": 1681, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009572336450219154, + "timestamp": "2025-09-10 02:39:35.068201", + "step": 1682, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:35.121073", + "step": 1682, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010687570087611675, + "timestamp": "2025-09-10 02:39:35.123262", + "step": 1683, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:39:35.184615", + "step": 1683, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009572351351380348, + "timestamp": "2025-09-10 02:39:35.196463", + "step": 1684, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:35.248934", + "step": 1684, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015068850480020046, + "timestamp": "2025-09-10 02:39:35.250945", + "step": 1685, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:35.303915", + "step": 1685, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009619450196623802, + "timestamp": "2025-09-10 02:39:35.305878", + "step": 1686, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:35.358821", + "step": 1686, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017892051255330443, + "timestamp": "2025-09-10 02:39:35.360992", + "step": 1687, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:35.418516", + "step": 1687, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019350871443748474, + "timestamp": "2025-09-10 02:39:35.429844", + "step": 1688, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:35.482007", + "step": 1688, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015114265494048595, + "timestamp": "2025-09-10 02:39:35.492291", + "step": 1689, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:35.545049", + "step": 1689, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004630051087588072, + "timestamp": "2025-09-10 02:39:35.551670", + "step": 1690, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:35.604738", + "step": 1690, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012441044673323631, + "timestamp": "2025-09-10 02:39:35.606871", + "step": 1691, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:35.659706", + "step": 1691, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029850082471966743, + "timestamp": "2025-09-10 02:39:35.665515", + "step": 1692, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:35.717869", + "step": 1692, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008027377538383007, + "timestamp": "2025-09-10 02:39:35.720100", + "step": 1693, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:35.772875", + "step": 1693, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02368379756808281, + "timestamp": "2025-09-10 02:39:35.775017", + "step": 1694, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:35.827947", + "step": 1694, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030561552848666906, + "timestamp": "2025-09-10 02:39:35.835918", + "step": 1695, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:35.889956", + "step": 1695, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022136729676276445, + "timestamp": "2025-09-10 02:39:35.896706", + "step": 1696, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:35.949631", + "step": 1696, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012324579060077667, + "timestamp": "2025-09-10 02:39:35.959894", + "step": 1697, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:36.014285", + "step": 1697, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012455495074391365, + "timestamp": "2025-09-10 02:39:36.016765", + "step": 1698, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:39:36.077896", + "step": 1698, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012829869985580444, + "timestamp": "2025-09-10 02:39:36.088611", + "step": 1699, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:39:36.157442", + "step": 1699, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013320697471499443, + "timestamp": "2025-09-10 02:39:36.170850", + "step": 1700, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:36.225323", + "step": 1700, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00998709350824356, + "timestamp": "2025-09-10 02:39:36.227706", + "step": 1701, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:36.281778", + "step": 1701, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019189154729247093, + "timestamp": "2025-09-10 02:39:36.284144", + "step": 1702, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:36.342815", + "step": 1702, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024400973692536354, + "timestamp": "2025-09-10 02:39:36.353247", + "step": 1703, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:36.408251", + "step": 1703, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019246473908424377, + "timestamp": "2025-09-10 02:39:36.414245", + "step": 1704, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:36.467729", + "step": 1704, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012863265350461006, + "timestamp": "2025-09-10 02:39:36.473639", + "step": 1705, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:36.527103", + "step": 1705, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016453877091407776, + "timestamp": "2025-09-10 02:39:36.529161", + "step": 1706, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:36.581847", + "step": 1706, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020454296842217445, + "timestamp": "2025-09-10 02:39:36.584054", + "step": 1707, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:36.638675", + "step": 1707, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009613310918211937, + "timestamp": "2025-09-10 02:39:36.649233", + "step": 1708, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:36.703466", + "step": 1708, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005098339635878801, + "timestamp": "2025-09-10 02:39:36.705472", + "step": 1709, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:36.759205", + "step": 1709, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02107381820678711, + "timestamp": "2025-09-10 02:39:36.765243", + "step": 1710, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:36.820206", + "step": 1710, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049266356974840164, + "timestamp": "2025-09-10 02:39:36.822400", + "step": 1711, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:36.875529", + "step": 1711, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019013753160834312, + "timestamp": "2025-09-10 02:39:36.882011", + "step": 1712, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:39:36.941354", + "step": 1712, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01836700178682804, + "timestamp": "2025-09-10 02:39:36.952867", + "step": 1713, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:37.006906", + "step": 1713, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016758916899561882, + "timestamp": "2025-09-10 02:39:37.008907", + "step": 1714, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:37.062799", + "step": 1714, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005026537459343672, + "timestamp": "2025-09-10 02:39:37.065149", + "step": 1715, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:37.118921", + "step": 1715, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014079469256103039, + "timestamp": "2025-09-10 02:39:37.125540", + "step": 1716, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:37.177893", + "step": 1716, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007057299371808767, + "timestamp": "2025-09-10 02:39:37.180205", + "step": 1717, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:39:37.260507", + "step": 1717, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008493524976074696, + "timestamp": "2025-09-10 02:39:37.275566", + "step": 1718, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:37.329096", + "step": 1718, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012126578018069267, + "timestamp": "2025-09-10 02:39:37.331508", + "step": 1719, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:39:37.388093", + "step": 1719, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007698687259107828, + "timestamp": "2025-09-10 02:39:37.397870", + "step": 1720, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:39:37.451161", + "step": 1720, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017780693247914314, + "timestamp": "2025-09-10 02:39:37.453186", + "step": 1721, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:37.507210", + "step": 1721, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0079504968598485, + "timestamp": "2025-09-10 02:39:37.514984", + "step": 1722, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:37.568490", + "step": 1722, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020733432844281197, + "timestamp": "2025-09-10 02:39:37.570888", + "step": 1723, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:39:37.632456", + "step": 1723, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014720803126692772, + "timestamp": "2025-09-10 02:39:37.644327", + "step": 1724, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:37.696949", + "step": 1724, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011866986751556396, + "timestamp": "2025-09-10 02:39:37.703558", + "step": 1725, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:37.758577", + "step": 1725, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010004202835261822, + "timestamp": "2025-09-10 02:39:37.761446", + "step": 1726, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:39:37.822358", + "step": 1726, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011330981738865376, + "timestamp": "2025-09-10 02:39:37.833221", + "step": 1727, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:37.888160", + "step": 1727, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03494782745838165, + "timestamp": "2025-09-10 02:39:37.894168", + "step": 1728, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:37.946601", + "step": 1728, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011625164188444614, + "timestamp": "2025-09-10 02:39:37.948882", + "step": 1729, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:38.001641", + "step": 1729, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004412360489368439, + "timestamp": "2025-09-10 02:39:38.003882", + "step": 1730, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:38.057813", + "step": 1730, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011759593151509762, + "timestamp": "2025-09-10 02:39:38.059865", + "step": 1731, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:38.113391", + "step": 1731, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009773151949048042, + "timestamp": "2025-09-10 02:39:38.119527", + "step": 1732, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:39:38.186000", + "step": 1732, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055666714906692505, + "timestamp": "2025-09-10 02:39:38.199622", + "step": 1733, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:38.253956", + "step": 1733, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006420874036848545, + "timestamp": "2025-09-10 02:39:38.256360", + "step": 1734, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:38.309605", + "step": 1734, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01741214655339718, + "timestamp": "2025-09-10 02:39:38.312342", + "step": 1735, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:38.365530", + "step": 1735, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020671477541327477, + "timestamp": "2025-09-10 02:39:38.371634", + "step": 1736, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:38.423940", + "step": 1736, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03731471672654152, + "timestamp": "2025-09-10 02:39:38.426052", + "step": 1737, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:38.479661", + "step": 1737, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003349814098328352, + "timestamp": "2025-09-10 02:39:38.481872", + "step": 1738, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:38.539854", + "step": 1738, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011552114970982075, + "timestamp": "2025-09-10 02:39:38.550337", + "step": 1739, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:38.604696", + "step": 1739, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004306585993617773, + "timestamp": "2025-09-10 02:39:38.610919", + "step": 1740, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:38.663627", + "step": 1740, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003052855608984828, + "timestamp": "2025-09-10 02:39:38.666353", + "step": 1741, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:38.721349", + "step": 1741, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002096347976475954, + "timestamp": "2025-09-10 02:39:38.723475", + "step": 1742, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:39:38.814954", + "step": 1742, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02477678470313549, + "timestamp": "2025-09-10 02:39:38.832036", + "step": 1743, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:38.885364", + "step": 1743, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004138125106692314, + "timestamp": "2025-09-10 02:39:38.891484", + "step": 1744, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:38.944356", + "step": 1744, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016858422895893455, + "timestamp": "2025-09-10 02:39:38.954382", + "step": 1745, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:39.008325", + "step": 1745, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005374787840992212, + "timestamp": "2025-09-10 02:39:39.010380", + "step": 1746, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:39.065980", + "step": 1746, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0028152461163699627, + "timestamp": "2025-09-10 02:39:39.067847", + "step": 1747, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:39.121350", + "step": 1747, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034364103339612484, + "timestamp": "2025-09-10 02:39:39.127249", + "step": 1748, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:39.180242", + "step": 1748, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012752421200275421, + "timestamp": "2025-09-10 02:39:39.182328", + "step": 1749, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:39.235189", + "step": 1749, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008162710815668106, + "timestamp": "2025-09-10 02:39:39.237377", + "step": 1750, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:39:39.307226", + "step": 1750, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013295854441821575, + "timestamp": "2025-09-10 02:39:39.320126", + "step": 1751, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:39:39.380805", + "step": 1751, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014668701216578484, + "timestamp": "2025-09-10 02:39:39.392436", + "step": 1752, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:39.445908", + "step": 1752, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007448400370776653, + "timestamp": "2025-09-10 02:39:39.452376", + "step": 1753, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:39.506197", + "step": 1753, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01767665334045887, + "timestamp": "2025-09-10 02:39:39.512771", + "step": 1754, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:39.566734", + "step": 1754, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002524305135011673, + "timestamp": "2025-09-10 02:39:39.569733", + "step": 1755, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:39.623244", + "step": 1755, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022315729409456253, + "timestamp": "2025-09-10 02:39:39.629361", + "step": 1756, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:39.682071", + "step": 1756, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004506973084062338, + "timestamp": "2025-09-10 02:39:39.684245", + "step": 1757, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:39.737964", + "step": 1757, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031227421015501022, + "timestamp": "2025-09-10 02:39:39.739827", + "step": 1758, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:39.792790", + "step": 1758, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01290964987128973, + "timestamp": "2025-09-10 02:39:39.794797", + "step": 1759, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:39.848931", + "step": 1759, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008333380334079266, + "timestamp": "2025-09-10 02:39:39.857203", + "step": 1760, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:39.910184", + "step": 1760, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005028809420764446, + "timestamp": "2025-09-10 02:39:39.912481", + "step": 1761, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:39.966345", + "step": 1761, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037227787543088198, + "timestamp": "2025-09-10 02:39:39.975990", + "step": 1762, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:40.029373", + "step": 1762, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010441971011459827, + "timestamp": "2025-09-10 02:39:40.031721", + "step": 1763, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:40.084630", + "step": 1763, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009730793535709381, + "timestamp": "2025-09-10 02:39:40.092140", + "step": 1764, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:39:57.278370", + "step": 1764, + "epoch": 1 + }, + { + "type": "pplx", + "content": 23444422.734878693, + "timestamp": "2025-09-10 02:39:57.281336", + "step": 1764, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:57.335376", + "step": 1764, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007947385311126709, + "timestamp": "2025-09-10 02:39:57.337325", + "step": 1765, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:57.391345", + "step": 1765, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01799796335399151, + "timestamp": "2025-09-10 02:39:57.393363", + "step": 1766, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:57.447909", + "step": 1766, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011859401129186153, + "timestamp": "2025-09-10 02:39:57.453227", + "step": 1767, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:57.506581", + "step": 1767, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00828345213085413, + "timestamp": "2025-09-10 02:39:57.512553", + "step": 1768, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:57.565353", + "step": 1768, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0010933061130344868, + "timestamp": "2025-09-10 02:39:57.567446", + "step": 1769, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:57.620634", + "step": 1769, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012491731904447079, + "timestamp": "2025-09-10 02:39:57.624032", + "step": 1770, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:57.678784", + "step": 1770, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026579495519399643, + "timestamp": "2025-09-10 02:39:57.680934", + "step": 1771, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:57.734443", + "step": 1771, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004869979806244373, + "timestamp": "2025-09-10 02:39:57.740255", + "step": 1772, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:57.792629", + "step": 1772, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008269052952528, + "timestamp": "2025-09-10 02:39:57.802681", + "step": 1773, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:57.856097", + "step": 1773, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023649216163903475, + "timestamp": "2025-09-10 02:39:57.858283", + "step": 1774, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:39:57.912210", + "step": 1774, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013124794699251652, + "timestamp": "2025-09-10 02:39:57.914418", + "step": 1775, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:57.967996", + "step": 1775, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005433118902146816, + "timestamp": "2025-09-10 02:39:57.974281", + "step": 1776, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:58.031112", + "step": 1776, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036824531853199005, + "timestamp": "2025-09-10 02:39:58.042321", + "step": 1777, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:58.096448", + "step": 1777, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009437480941414833, + "timestamp": "2025-09-10 02:39:58.104293", + "step": 1778, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:58.157966", + "step": 1778, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06587214767932892, + "timestamp": "2025-09-10 02:39:58.160157", + "step": 1779, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:58.213369", + "step": 1779, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034657519310712814, + "timestamp": "2025-09-10 02:39:58.222235", + "step": 1780, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:39:58.282402", + "step": 1780, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008408496156334877, + "timestamp": "2025-09-10 02:39:58.294371", + "step": 1781, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:58.347901", + "step": 1781, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036837959196418524, + "timestamp": "2025-09-10 02:39:58.355985", + "step": 1782, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:58.408962", + "step": 1782, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023239104077219963, + "timestamp": "2025-09-10 02:39:58.411021", + "step": 1783, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:39:58.477186", + "step": 1783, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.058617573231458664, + "timestamp": "2025-09-10 02:39:58.490214", + "step": 1784, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:58.543189", + "step": 1784, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013705234043300152, + "timestamp": "2025-09-10 02:39:58.551000", + "step": 1785, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:58.604011", + "step": 1785, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005106168333441019, + "timestamp": "2025-09-10 02:39:58.605991", + "step": 1786, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:58.658833", + "step": 1786, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0134356077760458, + "timestamp": "2025-09-10 02:39:58.660883", + "step": 1787, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:39:58.713037", + "step": 1787, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013329902663826942, + "timestamp": "2025-09-10 02:39:58.719041", + "step": 1788, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:58.771308", + "step": 1788, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005628393497318029, + "timestamp": "2025-09-10 02:39:58.773603", + "step": 1789, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:39:58.826065", + "step": 1789, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008878232911229134, + "timestamp": "2025-09-10 02:39:58.828323", + "step": 1790, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:39:58.894544", + "step": 1790, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010674390010535717, + "timestamp": "2025-09-10 02:39:58.906803", + "step": 1791, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:39:58.959780", + "step": 1791, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006005376577377319, + "timestamp": "2025-09-10 02:39:58.967076", + "step": 1792, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:39:59.027161", + "step": 1792, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037848882377147675, + "timestamp": "2025-09-10 02:39:59.039181", + "step": 1793, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:39:59.092192", + "step": 1793, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007080061826854944, + "timestamp": "2025-09-10 02:39:59.094541", + "step": 1794, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:59.147247", + "step": 1794, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008358953520655632, + "timestamp": "2025-09-10 02:39:59.149286", + "step": 1795, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:59.202119", + "step": 1795, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023781998082995415, + "timestamp": "2025-09-10 02:39:59.207752", + "step": 1796, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:59.260042", + "step": 1796, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00034054036950692534, + "timestamp": "2025-09-10 02:39:59.262180", + "step": 1797, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:59.315466", + "step": 1797, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028512893244624138, + "timestamp": "2025-09-10 02:39:59.317577", + "step": 1798, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:39:59.370398", + "step": 1798, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019109424203634262, + "timestamp": "2025-09-10 02:39:59.372704", + "step": 1799, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:39:59.426479", + "step": 1799, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024009615182876587, + "timestamp": "2025-09-10 02:39:59.436829", + "step": 1800, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:59.493523", + "step": 1800, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030184131115674973, + "timestamp": "2025-09-10 02:39:59.504708", + "step": 1801, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:59.562638", + "step": 1801, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018377343658357859, + "timestamp": "2025-09-10 02:39:59.573076", + "step": 1802, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:39:59.626122", + "step": 1802, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016195416683331132, + "timestamp": "2025-09-10 02:39:59.629578", + "step": 1803, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:59.685345", + "step": 1803, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007264415267854929, + "timestamp": "2025-09-10 02:39:59.691284", + "step": 1804, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:59.743371", + "step": 1804, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014265798963606358, + "timestamp": "2025-09-10 02:39:59.745575", + "step": 1805, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:39:59.798488", + "step": 1805, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002825483214110136, + "timestamp": "2025-09-10 02:39:59.800974", + "step": 1806, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:39:59.858687", + "step": 1806, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029959457460790873, + "timestamp": "2025-09-10 02:39:59.869117", + "step": 1807, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:39:59.922112", + "step": 1807, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002854627091437578, + "timestamp": "2025-09-10 02:39:59.927924", + "step": 1808, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:39:59.980147", + "step": 1808, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0220880676060915, + "timestamp": "2025-09-10 02:39:59.988483", + "step": 1809, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:00.041041", + "step": 1809, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03752187639474869, + "timestamp": "2025-09-10 02:40:00.047598", + "step": 1810, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:00.100387", + "step": 1810, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002775424625724554, + "timestamp": "2025-09-10 02:40:00.102557", + "step": 1811, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:00.155872", + "step": 1811, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006643721368163824, + "timestamp": "2025-09-10 02:40:00.166226", + "step": 1812, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:00.218967", + "step": 1812, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0041780597530305386, + "timestamp": "2025-09-10 02:40:00.221115", + "step": 1813, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:00.274015", + "step": 1813, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01680164411664009, + "timestamp": "2025-09-10 02:40:00.280499", + "step": 1814, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:00.333624", + "step": 1814, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005910966079682112, + "timestamp": "2025-09-10 02:40:00.343266", + "step": 1815, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:00.396359", + "step": 1815, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012250907719135284, + "timestamp": "2025-09-10 02:40:00.402129", + "step": 1816, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:00.461660", + "step": 1816, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02061128430068493, + "timestamp": "2025-09-10 02:40:00.473284", + "step": 1817, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:00.526428", + "step": 1817, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017202133312821388, + "timestamp": "2025-09-10 02:40:00.533004", + "step": 1818, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:00.585773", + "step": 1818, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0474618636071682, + "timestamp": "2025-09-10 02:40:00.587777", + "step": 1819, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:00.640253", + "step": 1819, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010387985035777092, + "timestamp": "2025-09-10 02:40:00.649286", + "step": 1820, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:00.700953", + "step": 1820, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004137419629842043, + "timestamp": "2025-09-10 02:40:00.703352", + "step": 1821, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:40:00.765449", + "step": 1821, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007008475251495838, + "timestamp": "2025-09-10 02:40:00.776555", + "step": 1822, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:00.829935", + "step": 1822, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02086186222732067, + "timestamp": "2025-09-10 02:40:00.831950", + "step": 1823, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:00.886211", + "step": 1823, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02719251997768879, + "timestamp": "2025-09-10 02:40:00.896806", + "step": 1824, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:00.952178", + "step": 1824, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014970486052334309, + "timestamp": "2025-09-10 02:40:00.955634", + "step": 1825, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:01.008770", + "step": 1825, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038791872560977936, + "timestamp": "2025-09-10 02:40:01.011973", + "step": 1826, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:01.065077", + "step": 1826, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015346644446253777, + "timestamp": "2025-09-10 02:40:01.067218", + "step": 1827, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:01.119835", + "step": 1827, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030021127313375473, + "timestamp": "2025-09-10 02:40:01.125503", + "step": 1828, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:01.177691", + "step": 1828, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013160435482859612, + "timestamp": "2025-09-10 02:40:01.179834", + "step": 1829, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:01.233121", + "step": 1829, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00401655025780201, + "timestamp": "2025-09-10 02:40:01.239295", + "step": 1830, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:01.291948", + "step": 1830, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012732602190226316, + "timestamp": "2025-09-10 02:40:01.294242", + "step": 1831, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:01.347306", + "step": 1831, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01764599420130253, + "timestamp": "2025-09-10 02:40:01.353190", + "step": 1832, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:01.405447", + "step": 1832, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007014024071395397, + "timestamp": "2025-09-10 02:40:01.407781", + "step": 1833, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:01.460412", + "step": 1833, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013560867868363857, + "timestamp": "2025-09-10 02:40:01.463280", + "step": 1834, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:01.516631", + "step": 1834, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004703877028077841, + "timestamp": "2025-09-10 02:40:01.519566", + "step": 1835, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:01.572773", + "step": 1835, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.047076888382434845, + "timestamp": "2025-09-10 02:40:01.579803", + "step": 1836, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:01.632375", + "step": 1836, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0168437696993351, + "timestamp": "2025-09-10 02:40:01.634776", + "step": 1837, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:01.688465", + "step": 1837, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009931335225701332, + "timestamp": "2025-09-10 02:40:01.698060", + "step": 1838, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:01.750606", + "step": 1838, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025910858064889908, + "timestamp": "2025-09-10 02:40:01.752595", + "step": 1839, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:01.805252", + "step": 1839, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036237796302884817, + "timestamp": "2025-09-10 02:40:01.815577", + "step": 1840, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:01.867781", + "step": 1840, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010877500288188457, + "timestamp": "2025-09-10 02:40:01.870018", + "step": 1841, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:01.922919", + "step": 1841, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010570277459919453, + "timestamp": "2025-09-10 02:40:01.924997", + "step": 1842, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:01.977444", + "step": 1842, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010649800300598145, + "timestamp": "2025-09-10 02:40:01.980429", + "step": 1843, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:02.033051", + "step": 1843, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013479354791343212, + "timestamp": "2025-09-10 02:40:02.038913", + "step": 1844, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:02.090901", + "step": 1844, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01640235260128975, + "timestamp": "2025-09-10 02:40:02.093181", + "step": 1845, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:02.145508", + "step": 1845, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013565384782850742, + "timestamp": "2025-09-10 02:40:02.148507", + "step": 1846, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:02.201624", + "step": 1846, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007320315111428499, + "timestamp": "2025-09-10 02:40:02.209608", + "step": 1847, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:02.270017", + "step": 1847, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014638463035225868, + "timestamp": "2025-09-10 02:40:02.281513", + "step": 1848, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:02.334573", + "step": 1848, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021155206486582756, + "timestamp": "2025-09-10 02:40:02.337117", + "step": 1849, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:02.390112", + "step": 1849, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01687222719192505, + "timestamp": "2025-09-10 02:40:02.392457", + "step": 1850, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:02.445413", + "step": 1850, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02835913561284542, + "timestamp": "2025-09-10 02:40:02.447776", + "step": 1851, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:40:02.515530", + "step": 1851, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021335987374186516, + "timestamp": "2025-09-10 02:40:02.528911", + "step": 1852, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 848 + ], + "flops": 16960103024960.0 + }, + "timestamp": "2025-09-10 02:40:02.649967", + "step": 1852, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033114091493189335, + "timestamp": "2025-09-10 02:40:02.676240", + "step": 1853, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:02.730670", + "step": 1853, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008411731570959091, + "timestamp": "2025-09-10 02:40:02.740484", + "step": 1854, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:02.793419", + "step": 1854, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018416091799736023, + "timestamp": "2025-09-10 02:40:02.795596", + "step": 1855, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:02.848384", + "step": 1855, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010702029801905155, + "timestamp": "2025-09-10 02:40:02.854418", + "step": 1856, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:02.912254", + "step": 1856, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01161882746964693, + "timestamp": "2025-09-10 02:40:02.918883", + "step": 1857, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:02.973696", + "step": 1857, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01350684929639101, + "timestamp": "2025-09-10 02:40:02.975500", + "step": 1858, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:03.039442", + "step": 1858, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019679781049489975, + "timestamp": "2025-09-10 02:40:03.046144", + "step": 1859, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:40:03.107187", + "step": 1859, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02340617962181568, + "timestamp": "2025-09-10 02:40:03.119020", + "step": 1860, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:03.171773", + "step": 1860, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012109608389437199, + "timestamp": "2025-09-10 02:40:03.174030", + "step": 1861, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:40:03.249546", + "step": 1861, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008661939762532711, + "timestamp": "2025-09-10 02:40:03.263599", + "step": 1862, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:40:03.337949", + "step": 1862, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033197938464581966, + "timestamp": "2025-09-10 02:40:03.350635", + "step": 1863, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:03.403715", + "step": 1863, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016732338815927505, + "timestamp": "2025-09-10 02:40:03.409746", + "step": 1864, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:03.466538", + "step": 1864, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0185225922614336, + "timestamp": "2025-09-10 02:40:03.477041", + "step": 1865, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:03.530585", + "step": 1865, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0076630981639027596, + "timestamp": "2025-09-10 02:40:03.533488", + "step": 1866, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:03.586623", + "step": 1866, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02388727478682995, + "timestamp": "2025-09-10 02:40:03.594743", + "step": 1867, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:03.647676", + "step": 1867, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019633090123534203, + "timestamp": "2025-09-10 02:40:03.656575", + "step": 1868, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:03.708791", + "step": 1868, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02341005578637123, + "timestamp": "2025-09-10 02:40:03.710816", + "step": 1869, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:03.768692", + "step": 1869, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007239409722387791, + "timestamp": "2025-09-10 02:40:03.779152", + "step": 1870, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:03.832114", + "step": 1870, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011209309101104736, + "timestamp": "2025-09-10 02:40:03.834269", + "step": 1871, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:03.887175", + "step": 1871, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032616887241601944, + "timestamp": "2025-09-10 02:40:03.894601", + "step": 1872, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:03.947072", + "step": 1872, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00542307598516345, + "timestamp": "2025-09-10 02:40:03.949371", + "step": 1873, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:40:04.001493", + "step": 1873, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004950906150043011, + "timestamp": "2025-09-10 02:40:04.003633", + "step": 1874, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:40:04.070033", + "step": 1874, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014805043116211891, + "timestamp": "2025-09-10 02:40:04.082230", + "step": 1875, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:04.135464", + "step": 1875, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005124398972839117, + "timestamp": "2025-09-10 02:40:04.141116", + "step": 1876, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:04.194098", + "step": 1876, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01372546423226595, + "timestamp": "2025-09-10 02:40:04.204618", + "step": 1877, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:04.257718", + "step": 1877, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05634238198399544, + "timestamp": "2025-09-10 02:40:04.264139", + "step": 1878, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:04.317267", + "step": 1878, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01247911062091589, + "timestamp": "2025-09-10 02:40:04.319533", + "step": 1879, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:40:04.385503", + "step": 1879, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00997505895793438, + "timestamp": "2025-09-10 02:40:04.398540", + "step": 1880, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:04.451137", + "step": 1880, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008485841564834118, + "timestamp": "2025-09-10 02:40:04.453921", + "step": 1881, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:04.507481", + "step": 1881, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021304922178387642, + "timestamp": "2025-09-10 02:40:04.509806", + "step": 1882, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:04.570944", + "step": 1882, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01408555917441845, + "timestamp": "2025-09-10 02:40:04.581616", + "step": 1883, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:04.635445", + "step": 1883, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01440160907804966, + "timestamp": "2025-09-10 02:40:04.640963", + "step": 1884, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:04.694230", + "step": 1884, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004871371667832136, + "timestamp": "2025-09-10 02:40:04.695981", + "step": 1885, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:04.750987", + "step": 1885, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02186504192650318, + "timestamp": "2025-09-10 02:40:04.760776", + "step": 1886, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:04.814094", + "step": 1886, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006802795920521021, + "timestamp": "2025-09-10 02:40:04.816099", + "step": 1887, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:04.868759", + "step": 1887, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010723703540861607, + "timestamp": "2025-09-10 02:40:04.874705", + "step": 1888, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:04.926605", + "step": 1888, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019888320937752724, + "timestamp": "2025-09-10 02:40:04.929725", + "step": 1889, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:04.982402", + "step": 1889, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012764276005327702, + "timestamp": "2025-09-10 02:40:04.984675", + "step": 1890, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:05.037489", + "step": 1890, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004333410877734423, + "timestamp": "2025-09-10 02:40:05.040329", + "step": 1891, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:05.093020", + "step": 1891, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036067774053663015, + "timestamp": "2025-09-10 02:40:05.098544", + "step": 1892, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:05.151069", + "step": 1892, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00459138723090291, + "timestamp": "2025-09-10 02:40:05.154046", + "step": 1893, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:40:05.250242", + "step": 1893, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014671608805656433, + "timestamp": "2025-09-10 02:40:05.268685", + "step": 1894, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:05.323673", + "step": 1894, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010200425051152706, + "timestamp": "2025-09-10 02:40:05.331230", + "step": 1895, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:05.384682", + "step": 1895, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019077172502875328, + "timestamp": "2025-09-10 02:40:05.390882", + "step": 1896, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:40:05.457493", + "step": 1896, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007850597612559795, + "timestamp": "2025-09-10 02:40:05.471113", + "step": 1897, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:05.524315", + "step": 1897, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023193396627902985, + "timestamp": "2025-09-10 02:40:05.526465", + "step": 1898, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:05.579958", + "step": 1898, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006891167256981134, + "timestamp": "2025-09-10 02:40:05.582890", + "step": 1899, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:05.636906", + "step": 1899, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008234263397753239, + "timestamp": "2025-09-10 02:40:05.647293", + "step": 1900, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:40:05.710951", + "step": 1900, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036044667940586805, + "timestamp": "2025-09-10 02:40:05.722705", + "step": 1901, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:05.776641", + "step": 1901, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001770059927366674, + "timestamp": "2025-09-10 02:40:05.786234", + "step": 1902, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:05.840202", + "step": 1902, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038121454417705536, + "timestamp": "2025-09-10 02:40:05.846217", + "step": 1903, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:05.906715", + "step": 1903, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0055883280001580715, + "timestamp": "2025-09-10 02:40:05.918246", + "step": 1904, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:05.971992", + "step": 1904, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016487419605255127, + "timestamp": "2025-09-10 02:40:05.974858", + "step": 1905, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:06.027284", + "step": 1905, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01049624290317297, + "timestamp": "2025-09-10 02:40:06.029292", + "step": 1906, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:06.083699", + "step": 1906, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0060119954869151115, + "timestamp": "2025-09-10 02:40:06.093534", + "step": 1907, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:06.146765", + "step": 1907, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00729665532708168, + "timestamp": "2025-09-10 02:40:06.154015", + "step": 1908, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:06.206271", + "step": 1908, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005614326801151037, + "timestamp": "2025-09-10 02:40:06.208039", + "step": 1909, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:06.260623", + "step": 1909, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018541625468060374, + "timestamp": "2025-09-10 02:40:06.262517", + "step": 1910, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:06.315959", + "step": 1910, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00960423331707716, + "timestamp": "2025-09-10 02:40:06.322350", + "step": 1911, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:40:23.158724", + "step": 1911, + "epoch": 1 + }, + { + "type": "pplx", + "content": 24056173.55783693, + "timestamp": "2025-09-10 02:40:23.161872", + "step": 1911, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:23.218843", + "step": 1911, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014825215563178062, + "timestamp": "2025-09-10 02:40:23.230026", + "step": 1912, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:23.283028", + "step": 1912, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014569023624062538, + "timestamp": "2025-09-10 02:40:23.285459", + "step": 1913, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:23.338469", + "step": 1913, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03071923367679119, + "timestamp": "2025-09-10 02:40:23.340508", + "step": 1914, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:23.393905", + "step": 1914, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011707932688295841, + "timestamp": "2025-09-10 02:40:23.395971", + "step": 1915, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:23.456336", + "step": 1915, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03220689669251442, + "timestamp": "2025-09-10 02:40:23.467856", + "step": 1916, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:23.522949", + "step": 1916, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017585042864084244, + "timestamp": "2025-09-10 02:40:23.524832", + "step": 1917, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:23.578339", + "step": 1917, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01379451435059309, + "timestamp": "2025-09-10 02:40:23.580389", + "step": 1918, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:23.634096", + "step": 1918, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018169602379202843, + "timestamp": "2025-09-10 02:40:23.639976", + "step": 1919, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:23.693777", + "step": 1919, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022639570757746696, + "timestamp": "2025-09-10 02:40:23.699886", + "step": 1920, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:23.752216", + "step": 1920, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005521832965314388, + "timestamp": "2025-09-10 02:40:23.754985", + "step": 1921, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:40:23.816751", + "step": 1921, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004009797237813473, + "timestamp": "2025-09-10 02:40:23.827864", + "step": 1922, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:23.880482", + "step": 1922, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04552547261118889, + "timestamp": "2025-09-10 02:40:23.882414", + "step": 1923, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:23.934892", + "step": 1923, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02368207462131977, + "timestamp": "2025-09-10 02:40:23.940653", + "step": 1924, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:23.993454", + "step": 1924, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005057979375123978, + "timestamp": "2025-09-10 02:40:24.003498", + "step": 1925, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:24.058857", + "step": 1925, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01649058423936367, + "timestamp": "2025-09-10 02:40:24.061064", + "step": 1926, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:24.114324", + "step": 1926, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012464964762330055, + "timestamp": "2025-09-10 02:40:24.116376", + "step": 1927, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:24.169759", + "step": 1927, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01559925265610218, + "timestamp": "2025-09-10 02:40:24.175670", + "step": 1928, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:24.227904", + "step": 1928, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029018137603998184, + "timestamp": "2025-09-10 02:40:24.238166", + "step": 1929, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:24.293016", + "step": 1929, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0177223589271307, + "timestamp": "2025-09-10 02:40:24.302753", + "step": 1930, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:24.355894", + "step": 1930, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032372507266700268, + "timestamp": "2025-09-10 02:40:24.362554", + "step": 1931, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:24.415006", + "step": 1931, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0042731501162052155, + "timestamp": "2025-09-10 02:40:24.424257", + "step": 1932, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:24.476856", + "step": 1932, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017299598082900047, + "timestamp": "2025-09-10 02:40:24.483564", + "step": 1933, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:24.537636", + "step": 1933, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02613774500787258, + "timestamp": "2025-09-10 02:40:24.544334", + "step": 1934, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:24.597695", + "step": 1934, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027100956067442894, + "timestamp": "2025-09-10 02:40:24.599828", + "step": 1935, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:24.652294", + "step": 1935, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012455842457711697, + "timestamp": "2025-09-10 02:40:24.658209", + "step": 1936, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:24.724219", + "step": 1936, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005860353820025921, + "timestamp": "2025-09-10 02:40:24.734452", + "step": 1937, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:24.788113", + "step": 1937, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001896485686302185, + "timestamp": "2025-09-10 02:40:24.797731", + "step": 1938, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:24.851069", + "step": 1938, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02183610573410988, + "timestamp": "2025-09-10 02:40:24.853067", + "step": 1939, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:24.906646", + "step": 1939, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005698530934751034, + "timestamp": "2025-09-10 02:40:24.912313", + "step": 1940, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:24.964092", + "step": 1940, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002761783543974161, + "timestamp": "2025-09-10 02:40:24.965800", + "step": 1941, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:25.018164", + "step": 1941, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03263257071375847, + "timestamp": "2025-09-10 02:40:25.020267", + "step": 1942, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:25.073043", + "step": 1942, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00514122424647212, + "timestamp": "2025-09-10 02:40:25.074908", + "step": 1943, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:25.127574", + "step": 1943, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003783545223996043, + "timestamp": "2025-09-10 02:40:25.134944", + "step": 1944, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:25.186780", + "step": 1944, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033572230022400618, + "timestamp": "2025-09-10 02:40:25.188884", + "step": 1945, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:25.241947", + "step": 1945, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0178757943212986, + "timestamp": "2025-09-10 02:40:25.248543", + "step": 1946, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:25.301799", + "step": 1946, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01767478697001934, + "timestamp": "2025-09-10 02:40:25.311395", + "step": 1947, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:25.365027", + "step": 1947, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03325825557112694, + "timestamp": "2025-09-10 02:40:25.370599", + "step": 1948, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:25.422800", + "step": 1948, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008630207739770412, + "timestamp": "2025-09-10 02:40:25.425028", + "step": 1949, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:25.478012", + "step": 1949, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012927801348268986, + "timestamp": "2025-09-10 02:40:25.480097", + "step": 1950, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:25.532402", + "step": 1950, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01747622713446617, + "timestamp": "2025-09-10 02:40:25.534547", + "step": 1951, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:25.586979", + "step": 1951, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00799343641847372, + "timestamp": "2025-09-10 02:40:25.593033", + "step": 1952, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:25.645898", + "step": 1952, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006082185078412294, + "timestamp": "2025-09-10 02:40:25.652239", + "step": 1953, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:25.706314", + "step": 1953, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004440020304173231, + "timestamp": "2025-09-10 02:40:25.708843", + "step": 1954, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:25.761772", + "step": 1954, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002935823518782854, + "timestamp": "2025-09-10 02:40:25.768105", + "step": 1955, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:25.820739", + "step": 1955, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009733036160469055, + "timestamp": "2025-09-10 02:40:25.826670", + "step": 1956, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:40:25.887079", + "step": 1956, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01457192562520504, + "timestamp": "2025-09-10 02:40:25.898838", + "step": 1957, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:25.957224", + "step": 1957, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005866926163434982, + "timestamp": "2025-09-10 02:40:25.967657", + "step": 1958, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:26.020790", + "step": 1958, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014265580102801323, + "timestamp": "2025-09-10 02:40:26.023624", + "step": 1959, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:26.077558", + "step": 1959, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020798334851861, + "timestamp": "2025-09-10 02:40:26.087950", + "step": 1960, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:26.140441", + "step": 1960, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002828251337632537, + "timestamp": "2025-09-10 02:40:26.142792", + "step": 1961, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:26.195785", + "step": 1961, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038729310035705566, + "timestamp": "2025-09-10 02:40:26.198704", + "step": 1962, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:26.252061", + "step": 1962, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007973147556185722, + "timestamp": "2025-09-10 02:40:26.254226", + "step": 1963, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:26.308028", + "step": 1963, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01782756857573986, + "timestamp": "2025-09-10 02:40:26.314847", + "step": 1964, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:26.368500", + "step": 1964, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009090019389986992, + "timestamp": "2025-09-10 02:40:26.370564", + "step": 1965, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:26.424040", + "step": 1965, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01522249635308981, + "timestamp": "2025-09-10 02:40:26.426215", + "step": 1966, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:26.480652", + "step": 1966, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010693386197090149, + "timestamp": "2025-09-10 02:40:26.490543", + "step": 1967, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:26.543914", + "step": 1967, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0013078663032501936, + "timestamp": "2025-09-10 02:40:26.550095", + "step": 1968, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:26.602813", + "step": 1968, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02390674129128456, + "timestamp": "2025-09-10 02:40:26.604947", + "step": 1969, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:26.657694", + "step": 1969, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01774417981505394, + "timestamp": "2025-09-10 02:40:26.659845", + "step": 1970, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:26.712806", + "step": 1970, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02107955515384674, + "timestamp": "2025-09-10 02:40:26.714926", + "step": 1971, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:26.768454", + "step": 1971, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011796504259109497, + "timestamp": "2025-09-10 02:40:26.778935", + "step": 1972, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:26.831230", + "step": 1972, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006210298743098974, + "timestamp": "2025-09-10 02:40:26.833224", + "step": 1973, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:40:26.894962", + "step": 1973, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010983224026858807, + "timestamp": "2025-09-10 02:40:26.906057", + "step": 1974, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:26.959127", + "step": 1974, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0164656862616539, + "timestamp": "2025-09-10 02:40:26.961333", + "step": 1975, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:27.014441", + "step": 1975, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0045163072645664215, + "timestamp": "2025-09-10 02:40:27.020596", + "step": 1976, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:40:27.087791", + "step": 1976, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01173480600118637, + "timestamp": "2025-09-10 02:40:27.101499", + "step": 1977, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:27.155148", + "step": 1977, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05862661823630333, + "timestamp": "2025-09-10 02:40:27.157976", + "step": 1978, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:27.211676", + "step": 1978, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00327744847163558, + "timestamp": "2025-09-10 02:40:27.217681", + "step": 1979, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:27.270452", + "step": 1979, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0009931438835337758, + "timestamp": "2025-09-10 02:40:27.276350", + "step": 1980, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:27.328971", + "step": 1980, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0451321043074131, + "timestamp": "2025-09-10 02:40:27.331008", + "step": 1981, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:27.383610", + "step": 1981, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018608104437589645, + "timestamp": "2025-09-10 02:40:27.385689", + "step": 1982, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:40:27.455604", + "step": 1982, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005149955861270428, + "timestamp": "2025-09-10 02:40:27.468476", + "step": 1983, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:27.520993", + "step": 1983, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04596984386444092, + "timestamp": "2025-09-10 02:40:27.526992", + "step": 1984, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:27.579735", + "step": 1984, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007791085634380579, + "timestamp": "2025-09-10 02:40:27.581620", + "step": 1985, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:27.634521", + "step": 1985, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009980211034417152, + "timestamp": "2025-09-10 02:40:27.636600", + "step": 1986, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:27.689577", + "step": 1986, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02932649478316307, + "timestamp": "2025-09-10 02:40:27.691837", + "step": 1987, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:27.745076", + "step": 1987, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004920269828289747, + "timestamp": "2025-09-10 02:40:27.751098", + "step": 1988, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:27.803776", + "step": 1988, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007863566279411316, + "timestamp": "2025-09-10 02:40:27.805711", + "step": 1989, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:27.858536", + "step": 1989, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014759792014956474, + "timestamp": "2025-09-10 02:40:27.865030", + "step": 1990, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:27.918231", + "step": 1990, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00929697509855032, + "timestamp": "2025-09-10 02:40:27.920521", + "step": 1991, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:27.975027", + "step": 1991, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01565462164580822, + "timestamp": "2025-09-10 02:40:27.985619", + "step": 1992, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:28.037768", + "step": 1992, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017104636877775192, + "timestamp": "2025-09-10 02:40:28.039982", + "step": 1993, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:28.093153", + "step": 1993, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002158787567168474, + "timestamp": "2025-09-10 02:40:28.095125", + "step": 1994, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:28.148034", + "step": 1994, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019636712968349457, + "timestamp": "2025-09-10 02:40:28.150230", + "step": 1995, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:28.203161", + "step": 1995, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014361539855599403, + "timestamp": "2025-09-10 02:40:28.209025", + "step": 1996, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:28.261114", + "step": 1996, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005031212233006954, + "timestamp": "2025-09-10 02:40:28.264074", + "step": 1997, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:28.316944", + "step": 1997, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001798151759430766, + "timestamp": "2025-09-10 02:40:28.323540", + "step": 1998, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:28.376162", + "step": 1998, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004470630548894405, + "timestamp": "2025-09-10 02:40:28.378097", + "step": 1999, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:28.431539", + "step": 1999, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014826876111328602, + "timestamp": "2025-09-10 02:40:28.441936", + "step": 2000, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 2000", + "timestamp": "2025-09-10 02:40:28.806360", + "step": 2000, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:28.861348", + "step": 2000, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005624215584248304, + "timestamp": "2025-09-10 02:40:28.869124", + "step": 2001, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:28.922948", + "step": 2001, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006075866054743528, + "timestamp": "2025-09-10 02:40:28.924936", + "step": 2002, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:28.977993", + "step": 2002, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004470535088330507, + "timestamp": "2025-09-10 02:40:28.980104", + "step": 2003, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:29.032984", + "step": 2003, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014739048667252064, + "timestamp": "2025-09-10 02:40:29.039098", + "step": 2004, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:29.092326", + "step": 2004, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0006459683063440025, + "timestamp": "2025-09-10 02:40:29.102913", + "step": 2005, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:29.155962", + "step": 2005, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007557640317827463, + "timestamp": "2025-09-10 02:40:29.158049", + "step": 2006, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:29.211024", + "step": 2006, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03876524418592453, + "timestamp": "2025-09-10 02:40:29.213043", + "step": 2007, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:29.266270", + "step": 2007, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018814915092661977, + "timestamp": "2025-09-10 02:40:29.274961", + "step": 2008, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:29.327012", + "step": 2008, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06593555212020874, + "timestamp": "2025-09-10 02:40:29.333537", + "step": 2009, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:29.386951", + "step": 2009, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020403151866048574, + "timestamp": "2025-09-10 02:40:29.389811", + "step": 2010, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:29.442137", + "step": 2010, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033842516131699085, + "timestamp": "2025-09-10 02:40:29.444278", + "step": 2011, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:29.497009", + "step": 2011, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00728357071056962, + "timestamp": "2025-09-10 02:40:29.502992", + "step": 2012, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:29.555775", + "step": 2012, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03079916164278984, + "timestamp": "2025-09-10 02:40:29.557790", + "step": 2013, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:29.610985", + "step": 2013, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07625376433134079, + "timestamp": "2025-09-10 02:40:29.612951", + "step": 2014, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:29.666191", + "step": 2014, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001167031587101519, + "timestamp": "2025-09-10 02:40:29.668384", + "step": 2015, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:29.721156", + "step": 2015, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014314286410808563, + "timestamp": "2025-09-10 02:40:29.727091", + "step": 2016, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:29.779107", + "step": 2016, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014441040344536304, + "timestamp": "2025-09-10 02:40:29.781155", + "step": 2017, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:29.833680", + "step": 2017, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008695382624864578, + "timestamp": "2025-09-10 02:40:29.836827", + "step": 2018, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:29.889332", + "step": 2018, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016935424879193306, + "timestamp": "2025-09-10 02:40:29.891605", + "step": 2019, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:29.944543", + "step": 2019, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0068209171295166016, + "timestamp": "2025-09-10 02:40:29.950429", + "step": 2020, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:30.003480", + "step": 2020, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02026510424911976, + "timestamp": "2025-09-10 02:40:30.010980", + "step": 2021, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:30.071495", + "step": 2021, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01842624880373478, + "timestamp": "2025-09-10 02:40:30.082184", + "step": 2022, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:30.143131", + "step": 2022, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033158693462610245, + "timestamp": "2025-09-10 02:40:30.153815", + "step": 2023, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:30.207737", + "step": 2023, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011084345169365406, + "timestamp": "2025-09-10 02:40:30.214780", + "step": 2024, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:30.273442", + "step": 2024, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004500050563365221, + "timestamp": "2025-09-10 02:40:30.285031", + "step": 2025, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:30.338633", + "step": 2025, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00649598240852356, + "timestamp": "2025-09-10 02:40:30.341661", + "step": 2026, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:30.395178", + "step": 2026, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008045542985200882, + "timestamp": "2025-09-10 02:40:30.397350", + "step": 2027, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:30.450292", + "step": 2027, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009977241978049278, + "timestamp": "2025-09-10 02:40:30.456240", + "step": 2028, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:30.508720", + "step": 2028, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005319902207702398, + "timestamp": "2025-09-10 02:40:30.510784", + "step": 2029, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:30.563647", + "step": 2029, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054678674787282944, + "timestamp": "2025-09-10 02:40:30.565900", + "step": 2030, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:30.618888", + "step": 2030, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019794004037976265, + "timestamp": "2025-09-10 02:40:30.621846", + "step": 2031, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:30.679506", + "step": 2031, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0349082425236702, + "timestamp": "2025-09-10 02:40:30.690856", + "step": 2032, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:30.745018", + "step": 2032, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0052823289297521114, + "timestamp": "2025-09-10 02:40:30.748108", + "step": 2033, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:30.802787", + "step": 2033, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01749844290316105, + "timestamp": "2025-09-10 02:40:30.812501", + "step": 2034, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:30.866184", + "step": 2034, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008537224493920803, + "timestamp": "2025-09-10 02:40:30.868578", + "step": 2035, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:40:30.948942", + "step": 2035, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012161684222519398, + "timestamp": "2025-09-10 02:40:30.964786", + "step": 2036, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:31.017952", + "step": 2036, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015352214686572552, + "timestamp": "2025-09-10 02:40:31.019928", + "step": 2037, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:31.077785", + "step": 2037, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012421314604580402, + "timestamp": "2025-09-10 02:40:31.088226", + "step": 2038, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:31.141507", + "step": 2038, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014564496465027332, + "timestamp": "2025-09-10 02:40:31.143756", + "step": 2039, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:31.197445", + "step": 2039, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024082129821181297, + "timestamp": "2025-09-10 02:40:31.203492", + "step": 2040, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:31.256660", + "step": 2040, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014478149823844433, + "timestamp": "2025-09-10 02:40:31.258781", + "step": 2041, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:40:31.320304", + "step": 2041, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019619962200522423, + "timestamp": "2025-09-10 02:40:31.331245", + "step": 2042, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:31.384981", + "step": 2042, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006779780145734549, + "timestamp": "2025-09-10 02:40:31.387119", + "step": 2043, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:40:31.454926", + "step": 2043, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027434879448264837, + "timestamp": "2025-09-10 02:40:31.468292", + "step": 2044, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:31.526393", + "step": 2044, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017053645569831133, + "timestamp": "2025-09-10 02:40:31.537583", + "step": 2045, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:31.591233", + "step": 2045, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014673621393740177, + "timestamp": "2025-09-10 02:40:31.593385", + "step": 2046, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:31.646329", + "step": 2046, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01856462098658085, + "timestamp": "2025-09-10 02:40:31.648813", + "step": 2047, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:31.702231", + "step": 2047, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017966721206903458, + "timestamp": "2025-09-10 02:40:31.709367", + "step": 2048, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:31.762069", + "step": 2048, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023420805111527443, + "timestamp": "2025-09-10 02:40:31.764310", + "step": 2049, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:31.817365", + "step": 2049, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026248861104249954, + "timestamp": "2025-09-10 02:40:31.819460", + "step": 2050, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:31.877177", + "step": 2050, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03295387700200081, + "timestamp": "2025-09-10 02:40:31.887597", + "step": 2051, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:31.941811", + "step": 2051, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014977223239839077, + "timestamp": "2025-09-10 02:40:31.947854", + "step": 2052, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:32.000312", + "step": 2052, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0006909515359438956, + "timestamp": "2025-09-10 02:40:32.002452", + "step": 2053, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:32.055476", + "step": 2053, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030724534299224615, + "timestamp": "2025-09-10 02:40:32.063602", + "step": 2054, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:32.117521", + "step": 2054, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009656290523707867, + "timestamp": "2025-09-10 02:40:32.119540", + "step": 2055, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:32.172823", + "step": 2055, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026681674644351006, + "timestamp": "2025-09-10 02:40:32.178727", + "step": 2056, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:32.231883", + "step": 2056, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012981426902115345, + "timestamp": "2025-09-10 02:40:32.242015", + "step": 2057, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:32.296889", + "step": 2057, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006506530102342367, + "timestamp": "2025-09-10 02:40:32.302560", + "step": 2058, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:40:49.113422", + "step": 2058, + "epoch": 1 + }, + { + "type": "pplx", + "content": 26482598.556206435, + "timestamp": "2025-09-10 02:40:49.116552", + "step": 2058, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:49.172178", + "step": 2058, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024873873218894005, + "timestamp": "2025-09-10 02:40:49.175258", + "step": 2059, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:49.233670", + "step": 2059, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0064423284493386745, + "timestamp": "2025-09-10 02:40:49.244883", + "step": 2060, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:49.298280", + "step": 2060, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037066757678985596, + "timestamp": "2025-09-10 02:40:49.307988", + "step": 2061, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:49.362062", + "step": 2061, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008213195949792862, + "timestamp": "2025-09-10 02:40:49.364425", + "step": 2062, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:49.417815", + "step": 2062, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021275460720062256, + "timestamp": "2025-09-10 02:40:49.424527", + "step": 2063, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:49.481192", + "step": 2063, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011230670846998692, + "timestamp": "2025-09-10 02:40:49.487036", + "step": 2064, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:49.539236", + "step": 2064, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006780553143471479, + "timestamp": "2025-09-10 02:40:49.544108", + "step": 2065, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:49.601060", + "step": 2065, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007425476796925068, + "timestamp": "2025-09-10 02:40:49.608155", + "step": 2066, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:49.665936", + "step": 2066, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021900055930018425, + "timestamp": "2025-09-10 02:40:49.675526", + "step": 2067, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:49.730962", + "step": 2067, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004908385220915079, + "timestamp": "2025-09-10 02:40:49.741371", + "step": 2068, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:49.814731", + "step": 2068, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011295017786324024, + "timestamp": "2025-09-10 02:40:49.817684", + "step": 2069, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:49.872430", + "step": 2069, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003216175129637122, + "timestamp": "2025-09-10 02:40:49.880617", + "step": 2070, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:49.935081", + "step": 2070, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031660795211791992, + "timestamp": "2025-09-10 02:40:49.941930", + "step": 2071, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:49.996444", + "step": 2071, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006437338888645172, + "timestamp": "2025-09-10 02:40:50.013093", + "step": 2072, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:50.069767", + "step": 2072, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020404119044542313, + "timestamp": "2025-09-10 02:40:50.076080", + "step": 2073, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:50.138601", + "step": 2073, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004126450512558222, + "timestamp": "2025-09-10 02:40:50.148246", + "step": 2074, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:40:50.228133", + "step": 2074, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0178743414580822, + "timestamp": "2025-09-10 02:40:50.238820", + "step": 2075, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:50.294582", + "step": 2075, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01718183420598507, + "timestamp": "2025-09-10 02:40:50.302841", + "step": 2076, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:50.373769", + "step": 2076, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029403779190033674, + "timestamp": "2025-09-10 02:40:50.376717", + "step": 2077, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:50.449271", + "step": 2077, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005355836357921362, + "timestamp": "2025-09-10 02:40:50.453516", + "step": 2078, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:50.516547", + "step": 2078, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007841928862035275, + "timestamp": "2025-09-10 02:40:50.520189", + "step": 2079, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:40:50.597780", + "step": 2079, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016193120973184705, + "timestamp": "2025-09-10 02:40:50.612645", + "step": 2080, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:50.665594", + "step": 2080, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008064589463174343, + "timestamp": "2025-09-10 02:40:50.668611", + "step": 2081, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:50.721966", + "step": 2081, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026570020243525505, + "timestamp": "2025-09-10 02:40:50.724252", + "step": 2082, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:50.777902", + "step": 2082, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021081626415252686, + "timestamp": "2025-09-10 02:40:50.787544", + "step": 2083, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:50.841123", + "step": 2083, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00983034260571003, + "timestamp": "2025-09-10 02:40:50.846721", + "step": 2084, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:40:50.911533", + "step": 2084, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014769394882023335, + "timestamp": "2025-09-10 02:40:50.924732", + "step": 2085, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:50.978880", + "step": 2085, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0013401007745414972, + "timestamp": "2025-09-10 02:40:50.981036", + "step": 2086, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:51.034679", + "step": 2086, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007707037031650543, + "timestamp": "2025-09-10 02:40:51.036886", + "step": 2087, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:40:51.098767", + "step": 2087, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002891720738261938, + "timestamp": "2025-09-10 02:40:51.110646", + "step": 2088, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:51.164562", + "step": 2088, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04081105440855026, + "timestamp": "2025-09-10 02:40:51.166698", + "step": 2089, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:51.221682", + "step": 2089, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035205941647291183, + "timestamp": "2025-09-10 02:40:51.231520", + "step": 2090, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:40:51.297941", + "step": 2090, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004805359989404678, + "timestamp": "2025-09-10 02:40:51.310208", + "step": 2091, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:51.363441", + "step": 2091, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009850825183093548, + "timestamp": "2025-09-10 02:40:51.369467", + "step": 2092, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:40:51.434848", + "step": 2092, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0668928474187851, + "timestamp": "2025-09-10 02:40:51.448062", + "step": 2093, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:51.501565", + "step": 2093, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021095644682645798, + "timestamp": "2025-09-10 02:40:51.504762", + "step": 2094, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:51.557952", + "step": 2094, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02470650151371956, + "timestamp": "2025-09-10 02:40:51.566111", + "step": 2095, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:51.619223", + "step": 2095, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015062669292092323, + "timestamp": "2025-09-10 02:40:51.625078", + "step": 2096, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:51.677955", + "step": 2096, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010145655833184719, + "timestamp": "2025-09-10 02:40:51.684519", + "step": 2097, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:51.737896", + "step": 2097, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008013242855668068, + "timestamp": "2025-09-10 02:40:51.740021", + "step": 2098, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:51.793229", + "step": 2098, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018540335586294532, + "timestamp": "2025-09-10 02:40:51.802810", + "step": 2099, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:51.856652", + "step": 2099, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008650798350572586, + "timestamp": "2025-09-10 02:40:51.865483", + "step": 2100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:51.917795", + "step": 2100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00515222130343318, + "timestamp": "2025-09-10 02:40:51.920953", + "step": 2101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:51.975155", + "step": 2101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009928855113685131, + "timestamp": "2025-09-10 02:40:51.977422", + "step": 2102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:52.031787", + "step": 2102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013099177740514278, + "timestamp": "2025-09-10 02:40:52.034052", + "step": 2103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:52.086780", + "step": 2103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017769407480955124, + "timestamp": "2025-09-10 02:40:52.095945", + "step": 2104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:52.152661", + "step": 2104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0073226215317845345, + "timestamp": "2025-09-10 02:40:52.163885", + "step": 2105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:52.217115", + "step": 2105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03404330834746361, + "timestamp": "2025-09-10 02:40:52.219416", + "step": 2106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:52.277853", + "step": 2106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021174529567360878, + "timestamp": "2025-09-10 02:40:52.288285", + "step": 2107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:52.341070", + "step": 2107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013351579196751118, + "timestamp": "2025-09-10 02:40:52.346624", + "step": 2108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:52.398571", + "step": 2108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015658928314223886, + "timestamp": "2025-09-10 02:40:52.405076", + "step": 2109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:52.458803", + "step": 2109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005374718923121691, + "timestamp": "2025-09-10 02:40:52.466848", + "step": 2110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:52.525315", + "step": 2110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016732919961214066, + "timestamp": "2025-09-10 02:40:52.535758", + "step": 2111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:52.589115", + "step": 2111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006754300557076931, + "timestamp": "2025-09-10 02:40:52.595902", + "step": 2112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:52.648472", + "step": 2112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011038624681532383, + "timestamp": "2025-09-10 02:40:52.651259", + "step": 2113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:52.704536", + "step": 2113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0231216661632061, + "timestamp": "2025-09-10 02:40:52.706882", + "step": 2114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:52.760117", + "step": 2114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022676901891827583, + "timestamp": "2025-09-10 02:40:52.762541", + "step": 2115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:52.816024", + "step": 2115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01557923387736082, + "timestamp": "2025-09-10 02:40:52.821875", + "step": 2116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:52.874489", + "step": 2116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00846847239881754, + "timestamp": "2025-09-10 02:40:52.876937", + "step": 2117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:52.930537", + "step": 2117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02981100045144558, + "timestamp": "2025-09-10 02:40:52.932808", + "step": 2118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:52.986485", + "step": 2118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022590333595871925, + "timestamp": "2025-09-10 02:40:52.988714", + "step": 2119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:53.042318", + "step": 2119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030419545248150826, + "timestamp": "2025-09-10 02:40:53.052714", + "step": 2120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:53.105946", + "step": 2120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03492049500346184, + "timestamp": "2025-09-10 02:40:53.107948", + "step": 2121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:53.161050", + "step": 2121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015666289255023003, + "timestamp": "2025-09-10 02:40:53.163443", + "step": 2122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:53.216595", + "step": 2122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01425112783908844, + "timestamp": "2025-09-10 02:40:53.223277", + "step": 2123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:53.276776", + "step": 2123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012315441854298115, + "timestamp": "2025-09-10 02:40:53.282606", + "step": 2124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:53.335064", + "step": 2124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.059362031519412994, + "timestamp": "2025-09-10 02:40:53.343410", + "step": 2125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:53.398609", + "step": 2125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007058224640786648, + "timestamp": "2025-09-10 02:40:53.400668", + "step": 2126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:53.453569", + "step": 2126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010253772139549255, + "timestamp": "2025-09-10 02:40:53.455797", + "step": 2127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:53.508779", + "step": 2127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001817534677684307, + "timestamp": "2025-09-10 02:40:53.516151", + "step": 2128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:53.568888", + "step": 2128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022460997104644775, + "timestamp": "2025-09-10 02:40:53.571191", + "step": 2129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:53.624430", + "step": 2129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015433979220688343, + "timestamp": "2025-09-10 02:40:53.626651", + "step": 2130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:53.679649", + "step": 2130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007861977443099022, + "timestamp": "2025-09-10 02:40:53.681933", + "step": 2131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:53.734473", + "step": 2131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018494803458452225, + "timestamp": "2025-09-10 02:40:53.741861", + "step": 2132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:53.795208", + "step": 2132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006038778927177191, + "timestamp": "2025-09-10 02:40:53.805772", + "step": 2133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:53.858888", + "step": 2133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00769368140026927, + "timestamp": "2025-09-10 02:40:53.861058", + "step": 2134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:53.914948", + "step": 2134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007579221855849028, + "timestamp": "2025-09-10 02:40:53.924541", + "step": 2135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:40:53.991935", + "step": 2135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005548670422285795, + "timestamp": "2025-09-10 02:40:54.004958", + "step": 2136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:40:54.064983", + "step": 2136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012776902876794338, + "timestamp": "2025-09-10 02:40:54.076805", + "step": 2137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:54.129638", + "step": 2137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010065835900604725, + "timestamp": "2025-09-10 02:40:54.131690", + "step": 2138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:54.189670", + "step": 2138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019276706501841545, + "timestamp": "2025-09-10 02:40:54.200133", + "step": 2139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:54.253056", + "step": 2139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012315203435719013, + "timestamp": "2025-09-10 02:40:54.258806", + "step": 2140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:54.310812", + "step": 2140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006040751468390226, + "timestamp": "2025-09-10 02:40:54.312837", + "step": 2141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:54.365478", + "step": 2141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005082997027784586, + "timestamp": "2025-09-10 02:40:54.372180", + "step": 2142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:54.425890", + "step": 2142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009807256050407887, + "timestamp": "2025-09-10 02:40:54.432437", + "step": 2143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:54.490807", + "step": 2143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003767900401726365, + "timestamp": "2025-09-10 02:40:54.502042", + "step": 2144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:54.554992", + "step": 2144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011249847710132599, + "timestamp": "2025-09-10 02:40:54.557268", + "step": 2145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:54.612999", + "step": 2145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008101708255708218, + "timestamp": "2025-09-10 02:40:54.622756", + "step": 2146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:54.677208", + "step": 2146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004199292976409197, + "timestamp": "2025-09-10 02:40:54.679656", + "step": 2147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:54.733384", + "step": 2147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011460386216640472, + "timestamp": "2025-09-10 02:40:54.739338", + "step": 2148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:54.792358", + "step": 2148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010788912884891033, + "timestamp": "2025-09-10 02:40:54.794656", + "step": 2149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:54.846952", + "step": 2149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04312171787023544, + "timestamp": "2025-09-10 02:40:54.849091", + "step": 2150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:40:54.910876", + "step": 2150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004354742355644703, + "timestamp": "2025-09-10 02:40:54.921988", + "step": 2151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:54.974912", + "step": 2151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008099909871816635, + "timestamp": "2025-09-10 02:40:54.980530", + "step": 2152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:55.033326", + "step": 2152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008201665244996548, + "timestamp": "2025-09-10 02:40:55.043798", + "step": 2153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:55.096193", + "step": 2153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005976626183837652, + "timestamp": "2025-09-10 02:40:55.099205", + "step": 2154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:55.152582", + "step": 2154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0046859607100486755, + "timestamp": "2025-09-10 02:40:55.154925", + "step": 2155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:55.207884", + "step": 2155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01543146837502718, + "timestamp": "2025-09-10 02:40:55.213688", + "step": 2156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:40:55.273217", + "step": 2156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007171375211328268, + "timestamp": "2025-09-10 02:40:55.285022", + "step": 2157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:55.339587", + "step": 2157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009496279060840607, + "timestamp": "2025-09-10 02:40:55.341636", + "step": 2158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:55.393979", + "step": 2158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014618291519582272, + "timestamp": "2025-09-10 02:40:55.396219", + "step": 2159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:55.449356", + "step": 2159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004825818818062544, + "timestamp": "2025-09-10 02:40:55.455153", + "step": 2160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:40:55.507964", + "step": 2160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009887455962598324, + "timestamp": "2025-09-10 02:40:55.518244", + "step": 2161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:55.572029", + "step": 2161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.061893828213214874, + "timestamp": "2025-09-10 02:40:55.574466", + "step": 2162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:55.627299", + "step": 2162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015684375539422035, + "timestamp": "2025-09-10 02:40:55.629443", + "step": 2163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:40:55.690463", + "step": 2163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014504256658256054, + "timestamp": "2025-09-10 02:40:55.702200", + "step": 2164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:55.754233", + "step": 2164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016790887340903282, + "timestamp": "2025-09-10 02:40:55.757285", + "step": 2165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:55.809685", + "step": 2165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0051002162508666515, + "timestamp": "2025-09-10 02:40:55.816382", + "step": 2166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:55.869298", + "step": 2166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017120791599154472, + "timestamp": "2025-09-10 02:40:55.875886", + "step": 2167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:55.929927", + "step": 2167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002449542749673128, + "timestamp": "2025-09-10 02:40:55.936920", + "step": 2168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:55.988903", + "step": 2168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0005112888175062835, + "timestamp": "2025-09-10 02:40:55.991111", + "step": 2169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:56.043516", + "step": 2169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00936068408191204, + "timestamp": "2025-09-10 02:40:56.045789", + "step": 2170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:56.098994", + "step": 2170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004754960536956787, + "timestamp": "2025-09-10 02:40:56.105538", + "step": 2171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:56.158525", + "step": 2171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02954014204442501, + "timestamp": "2025-09-10 02:40:56.164206", + "step": 2172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:56.217165", + "step": 2172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026429308578372, + "timestamp": "2025-09-10 02:40:56.219295", + "step": 2173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:56.272322", + "step": 2173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002102070953696966, + "timestamp": "2025-09-10 02:40:56.274593", + "step": 2174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:56.327767", + "step": 2174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017273031175136566, + "timestamp": "2025-09-10 02:40:56.336133", + "step": 2175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:56.391343", + "step": 2175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03424067422747612, + "timestamp": "2025-09-10 02:40:56.401921", + "step": 2176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:56.454836", + "step": 2176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033699143677949905, + "timestamp": "2025-09-10 02:40:56.456892", + "step": 2177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:56.509941", + "step": 2177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013751120306551456, + "timestamp": "2025-09-10 02:40:56.512183", + "step": 2178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:56.564976", + "step": 2178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015615619719028473, + "timestamp": "2025-09-10 02:40:56.567043", + "step": 2179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:40:56.624877", + "step": 2179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020907536149024963, + "timestamp": "2025-09-10 02:40:56.636115", + "step": 2180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:56.689074", + "step": 2180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00806462112814188, + "timestamp": "2025-09-10 02:40:56.691226", + "step": 2181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:56.744060", + "step": 2181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006408077199012041, + "timestamp": "2025-09-10 02:40:56.747221", + "step": 2182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:56.800294", + "step": 2182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04697560518980026, + "timestamp": "2025-09-10 02:40:56.802424", + "step": 2183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:40:56.855341", + "step": 2183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03236076980829239, + "timestamp": "2025-09-10 02:40:56.862648", + "step": 2184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:56.915124", + "step": 2184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021621523424983025, + "timestamp": "2025-09-10 02:40:56.917396", + "step": 2185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:56.969915", + "step": 2185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009808598086237907, + "timestamp": "2025-09-10 02:40:56.972331", + "step": 2186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:40:57.033878", + "step": 2186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01728159561753273, + "timestamp": "2025-09-10 02:40:57.044793", + "step": 2187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:57.097474", + "step": 2187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021557852625846863, + "timestamp": "2025-09-10 02:40:57.103062", + "step": 2188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:57.155242", + "step": 2188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024152925238013268, + "timestamp": "2025-09-10 02:40:57.157486", + "step": 2189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:57.210112", + "step": 2189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013515089638531208, + "timestamp": "2025-09-10 02:40:57.213394", + "step": 2190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:40:57.285934", + "step": 2190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019097929820418358, + "timestamp": "2025-09-10 02:40:57.299450", + "step": 2191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:57.352243", + "step": 2191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00467953085899353, + "timestamp": "2025-09-10 02:40:57.357916", + "step": 2192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:57.410380", + "step": 2192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023125980515033007, + "timestamp": "2025-09-10 02:40:57.412667", + "step": 2193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:40:57.467289", + "step": 2193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004761739633977413, + "timestamp": "2025-09-10 02:40:57.477059", + "step": 2194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:40:57.530097", + "step": 2194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0024532435927540064, + "timestamp": "2025-09-10 02:40:57.532371", + "step": 2195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:40:57.585582", + "step": 2195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010808519087731838, + "timestamp": "2025-09-10 02:40:57.594656", + "step": 2196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:57.646716", + "step": 2196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01729532890021801, + "timestamp": "2025-09-10 02:40:57.648816", + "step": 2197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:40:57.700956", + "step": 2197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00806347094476223, + "timestamp": "2025-09-10 02:40:57.702942", + "step": 2198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:40:57.755078", + "step": 2198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012199915014207363, + "timestamp": "2025-09-10 02:40:57.757256", + "step": 2199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:57.809991", + "step": 2199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006157182157039642, + "timestamp": "2025-09-10 02:40:57.815658", + "step": 2200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:40:57.867886", + "step": 2200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016080476343631744, + "timestamp": "2025-09-10 02:40:57.870082", + "step": 2201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:40:57.922420", + "step": 2201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014938508160412312, + "timestamp": "2025-09-10 02:40:57.925617", + "step": 2202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:40:57.978098", + "step": 2202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026552477851510048, + "timestamp": "2025-09-10 02:40:57.980221", + "step": 2203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:40:58.032527", + "step": 2203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020586589351296425, + "timestamp": "2025-09-10 02:40:58.038587", + "step": 2204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:40:58.091055", + "step": 2204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026182897854596376, + "timestamp": "2025-09-10 02:40:58.093263", + "step": 2205, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:41:14.861620", + "step": 2205, + "epoch": 1 + }, + { + "type": "pplx", + "content": 24177451.08196949, + "timestamp": "2025-09-10 02:41:14.864180", + "step": 2205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:14.918501", + "step": 2205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02545582689344883, + "timestamp": "2025-09-10 02:41:14.925720", + "step": 2206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:14.979257", + "step": 2206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005285558756440878, + "timestamp": "2025-09-10 02:41:14.981182", + "step": 2207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:15.034496", + "step": 2207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018531398847699165, + "timestamp": "2025-09-10 02:41:15.041502", + "step": 2208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:15.093854", + "step": 2208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01486459281295538, + "timestamp": "2025-09-10 02:41:15.095964", + "step": 2209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:15.149956", + "step": 2209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00933418981730938, + "timestamp": "2025-09-10 02:41:15.152241", + "step": 2210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:15.206019", + "step": 2210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01151957642287016, + "timestamp": "2025-09-10 02:41:15.208363", + "step": 2211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:15.261884", + "step": 2211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006928191985934973, + "timestamp": "2025-09-10 02:41:15.268026", + "step": 2212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:15.320551", + "step": 2212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004967629909515381, + "timestamp": "2025-09-10 02:41:15.322497", + "step": 2213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:15.375229", + "step": 2213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027123447507619858, + "timestamp": "2025-09-10 02:41:15.377353", + "step": 2214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:15.430497", + "step": 2214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027462434023618698, + "timestamp": "2025-09-10 02:41:15.438472", + "step": 2215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:15.491111", + "step": 2215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02530898153781891, + "timestamp": "2025-09-10 02:41:15.496994", + "step": 2216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:15.549845", + "step": 2216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029349375516176224, + "timestamp": "2025-09-10 02:41:15.552603", + "step": 2217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:15.605607", + "step": 2217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01407963689416647, + "timestamp": "2025-09-10 02:41:15.607514", + "step": 2218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:15.660355", + "step": 2218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014234866015613079, + "timestamp": "2025-09-10 02:41:15.662322", + "step": 2219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:15.715788", + "step": 2219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014596564695239067, + "timestamp": "2025-09-10 02:41:15.726161", + "step": 2220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:15.782530", + "step": 2220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01654386892914772, + "timestamp": "2025-09-10 02:41:15.785840", + "step": 2221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:15.839424", + "step": 2221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00550526799634099, + "timestamp": "2025-09-10 02:41:15.841382", + "step": 2222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:15.895205", + "step": 2222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002153964713215828, + "timestamp": "2025-09-10 02:41:15.897507", + "step": 2223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:15.951316", + "step": 2223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008730700239539146, + "timestamp": "2025-09-10 02:41:15.958134", + "step": 2224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:16.010643", + "step": 2224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006057410035282373, + "timestamp": "2025-09-10 02:41:16.017053", + "step": 2225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:16.070410", + "step": 2225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018048031255602837, + "timestamp": "2025-09-10 02:41:16.080056", + "step": 2226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:16.133055", + "step": 2226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009440673515200615, + "timestamp": "2025-09-10 02:41:16.135249", + "step": 2227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:16.188960", + "step": 2227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001368698780424893, + "timestamp": "2025-09-10 02:41:16.194938", + "step": 2228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:16.247574", + "step": 2228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014728554524481297, + "timestamp": "2025-09-10 02:41:16.249578", + "step": 2229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:16.302032", + "step": 2229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011189117096364498, + "timestamp": "2025-09-10 02:41:16.304251", + "step": 2230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:41:16.377366", + "step": 2230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016204727813601494, + "timestamp": "2025-09-10 02:41:16.391071", + "step": 2231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:16.444465", + "step": 2231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01046925038099289, + "timestamp": "2025-09-10 02:41:16.451792", + "step": 2232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:16.504617", + "step": 2232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012259201146662235, + "timestamp": "2025-09-10 02:41:16.507474", + "step": 2233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:16.562099", + "step": 2233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002176963957026601, + "timestamp": "2025-09-10 02:41:16.564093", + "step": 2234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:16.617245", + "step": 2234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011239242739975452, + "timestamp": "2025-09-10 02:41:16.619515", + "step": 2235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:41:16.687523", + "step": 2235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023203168530017138, + "timestamp": "2025-09-10 02:41:16.700911", + "step": 2236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:16.753839", + "step": 2236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014164808671921492, + "timestamp": "2025-09-10 02:41:16.756042", + "step": 2237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:16.808492", + "step": 2237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022817770019173622, + "timestamp": "2025-09-10 02:41:16.810522", + "step": 2238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:16.862688", + "step": 2238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026868876069784164, + "timestamp": "2025-09-10 02:41:16.865808", + "step": 2239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:16.919119", + "step": 2239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017978010699152946, + "timestamp": "2025-09-10 02:41:16.927878", + "step": 2240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:16.981530", + "step": 2240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006993381772190332, + "timestamp": "2025-09-10 02:41:16.983675", + "step": 2241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:17.042397", + "step": 2241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012187718413770199, + "timestamp": "2025-09-10 02:41:17.052816", + "step": 2242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:17.106811", + "step": 2242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03987843915820122, + "timestamp": "2025-09-10 02:41:17.108917", + "step": 2243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:17.163179", + "step": 2243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01238629873842001, + "timestamp": "2025-09-10 02:41:17.170236", + "step": 2244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:17.224617", + "step": 2244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005961086135357618, + "timestamp": "2025-09-10 02:41:17.227023", + "step": 2245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:41:17.302355", + "step": 2245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01882362924516201, + "timestamp": "2025-09-10 02:41:17.316295", + "step": 2246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:17.369839", + "step": 2246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00610742811113596, + "timestamp": "2025-09-10 02:41:17.376321", + "step": 2247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:17.429804", + "step": 2247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004611399490386248, + "timestamp": "2025-09-10 02:41:17.437053", + "step": 2248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:17.489471", + "step": 2248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004369756672531366, + "timestamp": "2025-09-10 02:41:17.491843", + "step": 2249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:17.544442", + "step": 2249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019328685477375984, + "timestamp": "2025-09-10 02:41:17.546834", + "step": 2250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:17.599326", + "step": 2250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016948387492448092, + "timestamp": "2025-09-10 02:41:17.602584", + "step": 2251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:41:17.670376", + "step": 2251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033548062201589346, + "timestamp": "2025-09-10 02:41:17.683726", + "step": 2252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:17.741857", + "step": 2252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003852900117635727, + "timestamp": "2025-09-10 02:41:17.748685", + "step": 2253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:17.808310", + "step": 2253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017933115595951676, + "timestamp": "2025-09-10 02:41:17.811884", + "step": 2254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:17.869147", + "step": 2254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010794803500175476, + "timestamp": "2025-09-10 02:41:17.873143", + "step": 2255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:17.928256", + "step": 2255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01731909066438675, + "timestamp": "2025-09-10 02:41:17.934184", + "step": 2256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:41:17.995399", + "step": 2256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01035644207149744, + "timestamp": "2025-09-10 02:41:18.007186", + "step": 2257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:18.062789", + "step": 2257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019283056259155273, + "timestamp": "2025-09-10 02:41:18.064940", + "step": 2258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:18.120668", + "step": 2258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010629468597471714, + "timestamp": "2025-09-10 02:41:18.123305", + "step": 2259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:18.176668", + "step": 2259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013493156991899014, + "timestamp": "2025-09-10 02:41:18.182843", + "step": 2260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:18.236818", + "step": 2260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011419164016842842, + "timestamp": "2025-09-10 02:41:18.243436", + "step": 2261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:18.300132", + "step": 2261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020291399210691452, + "timestamp": "2025-09-10 02:41:18.302517", + "step": 2262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:41:18.375160", + "step": 2262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003984835464507341, + "timestamp": "2025-09-10 02:41:18.387351", + "step": 2263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:18.442897", + "step": 2263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030595448333770037, + "timestamp": "2025-09-10 02:41:18.449228", + "step": 2264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:18.506257", + "step": 2264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006595213431864977, + "timestamp": "2025-09-10 02:41:18.513844", + "step": 2265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:18.578026", + "step": 2265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022331099957227707, + "timestamp": "2025-09-10 02:41:18.588419", + "step": 2266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:18.643790", + "step": 2266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034477750305086374, + "timestamp": "2025-09-10 02:41:18.653342", + "step": 2267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:18.706215", + "step": 2267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02623889409005642, + "timestamp": "2025-09-10 02:41:18.712194", + "step": 2268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:18.764351", + "step": 2268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026746975257992744, + "timestamp": "2025-09-10 02:41:18.774648", + "step": 2269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:18.828138", + "step": 2269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01220802403986454, + "timestamp": "2025-09-10 02:41:18.830280", + "step": 2270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:18.888155", + "step": 2270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011245275381952524, + "timestamp": "2025-09-10 02:41:18.898608", + "step": 2271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:41:18.953462", + "step": 2271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003515868680551648, + "timestamp": "2025-09-10 02:41:18.964064", + "step": 2272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:19.016398", + "step": 2272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018965017050504684, + "timestamp": "2025-09-10 02:41:19.018443", + "step": 2273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:19.071015", + "step": 2273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020753650460392237, + "timestamp": "2025-09-10 02:41:19.073122", + "step": 2274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:41:19.125775", + "step": 2274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002205053111538291, + "timestamp": "2025-09-10 02:41:19.127950", + "step": 2275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:19.180845", + "step": 2275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006280322093516588, + "timestamp": "2025-09-10 02:41:19.186799", + "step": 2276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:19.239786", + "step": 2276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013156109489500523, + "timestamp": "2025-09-10 02:41:19.241832", + "step": 2277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:19.295259", + "step": 2277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023661980405449867, + "timestamp": "2025-09-10 02:41:19.297267", + "step": 2278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:19.350321", + "step": 2278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0168520025908947, + "timestamp": "2025-09-10 02:41:19.352454", + "step": 2279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:19.406007", + "step": 2279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001595528912730515, + "timestamp": "2025-09-10 02:41:19.412906", + "step": 2280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:19.465162", + "step": 2280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002066161250695586, + "timestamp": "2025-09-10 02:41:19.467152", + "step": 2281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:41:19.528441", + "step": 2281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010385619476437569, + "timestamp": "2025-09-10 02:41:19.539534", + "step": 2282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:41:19.600765", + "step": 2282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009713256731629372, + "timestamp": "2025-09-10 02:41:19.611709", + "step": 2283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:19.664566", + "step": 2283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001635165186598897, + "timestamp": "2025-09-10 02:41:19.670128", + "step": 2284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:41:19.729270", + "step": 2284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004750643391162157, + "timestamp": "2025-09-10 02:41:19.741035", + "step": 2285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:19.794319", + "step": 2285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03609858825802803, + "timestamp": "2025-09-10 02:41:19.796548", + "step": 2286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:19.849486", + "step": 2286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013071305118501186, + "timestamp": "2025-09-10 02:41:19.852437", + "step": 2287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:19.905906", + "step": 2287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0013990921434015036, + "timestamp": "2025-09-10 02:41:19.911706", + "step": 2288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:19.963843", + "step": 2288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037052638828754425, + "timestamp": "2025-09-10 02:41:19.966099", + "step": 2289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:20.019295", + "step": 2289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020622368901968002, + "timestamp": "2025-09-10 02:41:20.021174", + "step": 2290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:41:20.075321", + "step": 2290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023187126498669386, + "timestamp": "2025-09-10 02:41:20.085156", + "step": 2291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:20.138387", + "step": 2291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026464611291885376, + "timestamp": "2025-09-10 02:41:20.144565", + "step": 2292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:20.197490", + "step": 2292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0021855831146240234, + "timestamp": "2025-09-10 02:41:20.199834", + "step": 2293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:20.252235", + "step": 2293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020060395821928978, + "timestamp": "2025-09-10 02:41:20.258773", + "step": 2294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:20.311938", + "step": 2294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002736916532739997, + "timestamp": "2025-09-10 02:41:20.318564", + "step": 2295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:20.371618", + "step": 2295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022916479501873255, + "timestamp": "2025-09-10 02:41:20.377491", + "step": 2296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:20.429420", + "step": 2296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012775218114256859, + "timestamp": "2025-09-10 02:41:20.431779", + "step": 2297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:20.484961", + "step": 2297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024944672361016273, + "timestamp": "2025-09-10 02:41:20.487135", + "step": 2298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:20.540107", + "step": 2298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004370290320366621, + "timestamp": "2025-09-10 02:41:20.542171", + "step": 2299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:20.595338", + "step": 2299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006103006657212973, + "timestamp": "2025-09-10 02:41:20.604304", + "step": 2300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:20.657144", + "step": 2300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009504346176981926, + "timestamp": "2025-09-10 02:41:20.659242", + "step": 2301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:20.712311", + "step": 2301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023260193411260843, + "timestamp": "2025-09-10 02:41:20.714627", + "step": 2302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:20.768174", + "step": 2302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003965396899729967, + "timestamp": "2025-09-10 02:41:20.776187", + "step": 2303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:20.834491", + "step": 2303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007237962447106838, + "timestamp": "2025-09-10 02:41:20.845731", + "step": 2304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:20.898655", + "step": 2304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013981464318931103, + "timestamp": "2025-09-10 02:41:20.900768", + "step": 2305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:41:20.967264", + "step": 2305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008471324108541012, + "timestamp": "2025-09-10 02:41:20.979513", + "step": 2306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:21.032229", + "step": 2306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007229648530483246, + "timestamp": "2025-09-10 02:41:21.035154", + "step": 2307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:41:21.108651", + "step": 2307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023853302001953125, + "timestamp": "2025-09-10 02:41:21.123137", + "step": 2308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:21.175783", + "step": 2308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012786195613443851, + "timestamp": "2025-09-10 02:41:21.178011", + "step": 2309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:21.231153", + "step": 2309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02114769257605076, + "timestamp": "2025-09-10 02:41:21.233243", + "step": 2310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:21.286425", + "step": 2310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04166872054338455, + "timestamp": "2025-09-10 02:41:21.288590", + "step": 2311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:21.341390", + "step": 2311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01378029864281416, + "timestamp": "2025-09-10 02:41:21.348605", + "step": 2312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:21.401840", + "step": 2312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00712332921102643, + "timestamp": "2025-09-10 02:41:21.403853", + "step": 2313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:21.456243", + "step": 2313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013273650780320168, + "timestamp": "2025-09-10 02:41:21.464496", + "step": 2314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:21.517483", + "step": 2314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012544920668005943, + "timestamp": "2025-09-10 02:41:21.524028", + "step": 2315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:21.577238", + "step": 2315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002789823804050684, + "timestamp": "2025-09-10 02:41:21.583022", + "step": 2316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:41:21.635007", + "step": 2316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005540736485272646, + "timestamp": "2025-09-10 02:41:21.637245", + "step": 2317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:21.690185", + "step": 2317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007694281172007322, + "timestamp": "2025-09-10 02:41:21.692272", + "step": 2318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:41:21.746795", + "step": 2318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013156984932720661, + "timestamp": "2025-09-10 02:41:21.756619", + "step": 2319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:41:21.809346", + "step": 2319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038832908030599356, + "timestamp": "2025-09-10 02:41:21.815060", + "step": 2320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:21.867280", + "step": 2320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027881622314453125, + "timestamp": "2025-09-10 02:41:21.869552", + "step": 2321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:41:21.929849", + "step": 2321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02795393206179142, + "timestamp": "2025-09-10 02:41:21.940547", + "step": 2322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:41:22.000946", + "step": 2322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004871138371527195, + "timestamp": "2025-09-10 02:41:22.011684", + "step": 2323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:22.064313", + "step": 2323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025887552183121443, + "timestamp": "2025-09-10 02:41:22.070612", + "step": 2324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:22.123054", + "step": 2324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0193661879748106, + "timestamp": "2025-09-10 02:41:22.131332", + "step": 2325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:22.184154", + "step": 2325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001558991032652557, + "timestamp": "2025-09-10 02:41:22.190776", + "step": 2326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:41:22.252546", + "step": 2326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011013707146048546, + "timestamp": "2025-09-10 02:41:22.263674", + "step": 2327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:41:22.336472", + "step": 2327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006773136556148529, + "timestamp": "2025-09-10 02:41:22.350749", + "step": 2328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:22.403496", + "step": 2328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003129596123471856, + "timestamp": "2025-09-10 02:41:22.405834", + "step": 2329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:22.458464", + "step": 2329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017954537644982338, + "timestamp": "2025-09-10 02:41:22.460523", + "step": 2330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:22.518551", + "step": 2330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004090786445885897, + "timestamp": "2025-09-10 02:41:22.528966", + "step": 2331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:22.585169", + "step": 2331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008648094721138477, + "timestamp": "2025-09-10 02:41:22.592305", + "step": 2332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:22.645362", + "step": 2332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01296969223767519, + "timestamp": "2025-09-10 02:41:22.647523", + "step": 2333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:22.701127", + "step": 2333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012112529948353767, + "timestamp": "2025-09-10 02:41:22.710726", + "step": 2334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:22.763657", + "step": 2334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004430691245943308, + "timestamp": "2025-09-10 02:41:22.765892", + "step": 2335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:22.819069", + "step": 2335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01480043400079012, + "timestamp": "2025-09-10 02:41:22.824959", + "step": 2336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:22.877693", + "step": 2336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011013038456439972, + "timestamp": "2025-09-10 02:41:22.879858", + "step": 2337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:41:22.952336", + "step": 2337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0402669832110405, + "timestamp": "2025-09-10 02:41:22.965808", + "step": 2338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:23.019273", + "step": 2338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002841313136741519, + "timestamp": "2025-09-10 02:41:23.021275", + "step": 2339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:23.074502", + "step": 2339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008241831324994564, + "timestamp": "2025-09-10 02:41:23.080247", + "step": 2340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:23.136588", + "step": 2340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03805210813879967, + "timestamp": "2025-09-10 02:41:23.147809", + "step": 2341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:41:23.218593", + "step": 2341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00663991691544652, + "timestamp": "2025-09-10 02:41:23.231512", + "step": 2342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:23.285044", + "step": 2342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011308408342301846, + "timestamp": "2025-09-10 02:41:23.287758", + "step": 2343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:23.340550", + "step": 2343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005613468121737242, + "timestamp": "2025-09-10 02:41:23.346532", + "step": 2344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:23.398457", + "step": 2344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019076507538557053, + "timestamp": "2025-09-10 02:41:23.401404", + "step": 2345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:23.454352", + "step": 2345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015190249308943748, + "timestamp": "2025-09-10 02:41:23.456783", + "step": 2346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:23.509982", + "step": 2346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03243137523531914, + "timestamp": "2025-09-10 02:41:23.512181", + "step": 2347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:23.566203", + "step": 2347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0007310754735954106, + "timestamp": "2025-09-10 02:41:23.575244", + "step": 2348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:23.627876", + "step": 2348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011788978008553386, + "timestamp": "2025-09-10 02:41:23.629824", + "step": 2349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:41:23.689973", + "step": 2349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024757685139775276, + "timestamp": "2025-09-10 02:41:23.700602", + "step": 2350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:23.754312", + "step": 2350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0053327870555222034, + "timestamp": "2025-09-10 02:41:23.756596", + "step": 2351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:41:23.852810", + "step": 2351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020339807495474815, + "timestamp": "2025-09-10 02:41:23.872109", + "step": 2352, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:41:40.656522", + "step": 2352, + "epoch": 1 + }, + { + "type": "pplx", + "content": 27640302.91922767, + "timestamp": "2025-09-10 02:41:40.659223", + "step": 2352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:40.712789", + "step": 2352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013624520972371101, + "timestamp": "2025-09-10 02:41:40.717892", + "step": 2353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:40.771968", + "step": 2353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037309457547962666, + "timestamp": "2025-09-10 02:41:40.774045", + "step": 2354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:40.827033", + "step": 2354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028440816327929497, + "timestamp": "2025-09-10 02:41:40.829076", + "step": 2355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:41:40.908970", + "step": 2355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002316842321306467, + "timestamp": "2025-09-10 02:41:40.924787", + "step": 2356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 3, + 224 + ], + "flops": 3360020475552.0 + }, + "timestamp": "2025-09-10 02:41:40.994475", + "step": 2356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00621901685371995, + "timestamp": "2025-09-10 02:41:40.996695", + "step": 2357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:41.053972", + "step": 2357, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002619270933791995, + "timestamp": "2025-09-10 02:41:41.056175", + "step": 2358, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:41.109441", + "step": 2358, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00830973032861948, + "timestamp": "2025-09-10 02:41:41.112235", + "step": 2359, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:41.165134", + "step": 2359, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02099130116403103, + "timestamp": "2025-09-10 02:41:41.172128", + "step": 2360, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:41.225664", + "step": 2360, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021291126031428576, + "timestamp": "2025-09-10 02:41:41.227614", + "step": 2361, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:41.280742", + "step": 2361, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000321090774377808, + "timestamp": "2025-09-10 02:41:41.283067", + "step": 2362, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:41.335560", + "step": 2362, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012044147588312626, + "timestamp": "2025-09-10 02:41:41.337637", + "step": 2363, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:41.391314", + "step": 2363, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04186839610338211, + "timestamp": "2025-09-10 02:41:41.397142", + "step": 2364, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:41.449450", + "step": 2364, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03821462020277977, + "timestamp": "2025-09-10 02:41:41.451518", + "step": 2365, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:41.504028", + "step": 2365, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02326563559472561, + "timestamp": "2025-09-10 02:41:41.510517", + "step": 2366, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:41.562820", + "step": 2366, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005296733230352402, + "timestamp": "2025-09-10 02:41:41.565831", + "step": 2367, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:41.618470", + "step": 2367, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0141748683527112, + "timestamp": "2025-09-10 02:41:41.624272", + "step": 2368, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:41.676324", + "step": 2368, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03244634345173836, + "timestamp": "2025-09-10 02:41:41.678403", + "step": 2369, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:41.731091", + "step": 2369, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030230763368308544, + "timestamp": "2025-09-10 02:41:41.733172", + "step": 2370, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:41.790556", + "step": 2370, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023990808986127377, + "timestamp": "2025-09-10 02:41:41.801018", + "step": 2371, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:41.853887", + "step": 2371, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029439318459481, + "timestamp": "2025-09-10 02:41:41.859587", + "step": 2372, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:41.912190", + "step": 2372, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001223023165948689, + "timestamp": "2025-09-10 02:41:41.914393", + "step": 2373, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:41.967738", + "step": 2373, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022844061255455017, + "timestamp": "2025-09-10 02:41:41.977369", + "step": 2374, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:42.030311", + "step": 2374, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007408964913338423, + "timestamp": "2025-09-10 02:41:42.032352", + "step": 2375, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:42.085178", + "step": 2375, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005025565158575773, + "timestamp": "2025-09-10 02:41:42.092159", + "step": 2376, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:42.144032", + "step": 2376, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003703473135828972, + "timestamp": "2025-09-10 02:41:42.146175", + "step": 2377, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:42.199586", + "step": 2377, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023201072588562965, + "timestamp": "2025-09-10 02:41:42.205988", + "step": 2378, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:42.258629", + "step": 2378, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037703088019043207, + "timestamp": "2025-09-10 02:41:42.260669", + "step": 2379, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:42.313216", + "step": 2379, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029380829073488712, + "timestamp": "2025-09-10 02:41:42.319045", + "step": 2380, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:42.371988", + "step": 2380, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005905658472329378, + "timestamp": "2025-09-10 02:41:42.373911", + "step": 2381, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:42.426887", + "step": 2381, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012931009754538536, + "timestamp": "2025-09-10 02:41:42.428858", + "step": 2382, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:42.481751", + "step": 2382, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017085112631320953, + "timestamp": "2025-09-10 02:41:42.484046", + "step": 2383, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:42.538714", + "step": 2383, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009159311652183533, + "timestamp": "2025-09-10 02:41:42.544295", + "step": 2384, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:41:42.596530", + "step": 2384, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008803758770227432, + "timestamp": "2025-09-10 02:41:42.598603", + "step": 2385, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:42.652099", + "step": 2385, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041297849267721176, + "timestamp": "2025-09-10 02:41:42.654294", + "step": 2386, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:41:42.714937", + "step": 2386, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009001196012832224, + "timestamp": "2025-09-10 02:41:42.725677", + "step": 2387, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:42.778458", + "step": 2387, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030867415480315685, + "timestamp": "2025-09-10 02:41:42.784303", + "step": 2388, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:42.836301", + "step": 2388, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014532825909554958, + "timestamp": "2025-09-10 02:41:42.838335", + "step": 2389, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:42.891475", + "step": 2389, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009635944850742817, + "timestamp": "2025-09-10 02:41:42.893800", + "step": 2390, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:42.947016", + "step": 2390, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018802549690008163, + "timestamp": "2025-09-10 02:41:42.949831", + "step": 2391, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:43.002760", + "step": 2391, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021239126101136208, + "timestamp": "2025-09-10 02:41:43.010007", + "step": 2392, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:41:43.061931", + "step": 2392, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011052722111344337, + "timestamp": "2025-09-10 02:41:43.064111", + "step": 2393, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:43.116832", + "step": 2393, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025772773660719395, + "timestamp": "2025-09-10 02:41:43.118920", + "step": 2394, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:43.171460", + "step": 2394, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00867217592895031, + "timestamp": "2025-09-10 02:41:43.173600", + "step": 2395, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:43.225982", + "step": 2395, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03286437317728996, + "timestamp": "2025-09-10 02:41:43.231653", + "step": 2396, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:43.283865", + "step": 2396, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016917405650019646, + "timestamp": "2025-09-10 02:41:43.285938", + "step": 2397, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:43.339107", + "step": 2397, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03374059125781059, + "timestamp": "2025-09-10 02:41:43.341465", + "step": 2398, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:43.395050", + "step": 2398, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008633555844426155, + "timestamp": "2025-09-10 02:41:43.397434", + "step": 2399, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:43.455902", + "step": 2399, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007943732663989067, + "timestamp": "2025-09-10 02:41:43.467117", + "step": 2400, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:43.519208", + "step": 2400, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014182468876242638, + "timestamp": "2025-09-10 02:41:43.521300", + "step": 2401, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:43.574009", + "step": 2401, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032707855105400085, + "timestamp": "2025-09-10 02:41:43.576269", + "step": 2402, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:43.628994", + "step": 2402, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0147025715559721, + "timestamp": "2025-09-10 02:41:43.631213", + "step": 2403, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:43.684120", + "step": 2403, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008943646214902401, + "timestamp": "2025-09-10 02:41:43.691273", + "step": 2404, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:43.743774", + "step": 2404, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003356833476573229, + "timestamp": "2025-09-10 02:41:43.745829", + "step": 2405, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:43.798891", + "step": 2405, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024517523124814034, + "timestamp": "2025-09-10 02:41:43.806763", + "step": 2406, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:43.860082", + "step": 2406, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030542414635419846, + "timestamp": "2025-09-10 02:41:43.868421", + "step": 2407, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:41:43.921062", + "step": 2407, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010237812995910645, + "timestamp": "2025-09-10 02:41:43.926768", + "step": 2408, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 928 + ], + "flops": 18560112737920.0 + }, + "timestamp": "2025-09-10 02:41:44.055465", + "step": 2408, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017811806872487068, + "timestamp": "2025-09-10 02:41:44.083804", + "step": 2409, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:44.137439", + "step": 2409, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00509228091686964, + "timestamp": "2025-09-10 02:41:44.139492", + "step": 2410, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:44.193111", + "step": 2410, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017014186829328537, + "timestamp": "2025-09-10 02:41:44.199142", + "step": 2411, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:44.252782", + "step": 2411, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01280141156166792, + "timestamp": "2025-09-10 02:41:44.259274", + "step": 2412, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:41:44.323682", + "step": 2412, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003944852855056524, + "timestamp": "2025-09-10 02:41:44.336973", + "step": 2413, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:44.390190", + "step": 2413, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00950128398835659, + "timestamp": "2025-09-10 02:41:44.392595", + "step": 2414, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:44.445675", + "step": 2414, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02840043418109417, + "timestamp": "2025-09-10 02:41:44.447783", + "step": 2415, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:44.500886", + "step": 2415, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006371225696057081, + "timestamp": "2025-09-10 02:41:44.507878", + "step": 2416, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:44.560264", + "step": 2416, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010210484266281128, + "timestamp": "2025-09-10 02:41:44.563063", + "step": 2417, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:44.616721", + "step": 2417, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0269516222178936, + "timestamp": "2025-09-10 02:41:44.626378", + "step": 2418, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:44.679939", + "step": 2418, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008304530754685402, + "timestamp": "2025-09-10 02:41:44.682262", + "step": 2419, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:44.734648", + "step": 2419, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00686608674004674, + "timestamp": "2025-09-10 02:41:44.740573", + "step": 2420, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:44.793275", + "step": 2420, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004633853677660227, + "timestamp": "2025-09-10 02:41:44.795885", + "step": 2421, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:41:44.864335", + "step": 2421, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009348112158477306, + "timestamp": "2025-09-10 02:41:44.876974", + "step": 2422, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:44.930440", + "step": 2422, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02112320438027382, + "timestamp": "2025-09-10 02:41:44.932456", + "step": 2423, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:44.985321", + "step": 2423, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006549620069563389, + "timestamp": "2025-09-10 02:41:44.991048", + "step": 2424, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:45.043448", + "step": 2424, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024886297062039375, + "timestamp": "2025-09-10 02:41:45.045826", + "step": 2425, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:45.099077", + "step": 2425, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008090948686003685, + "timestamp": "2025-09-10 02:41:45.101636", + "step": 2426, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:41:45.169496", + "step": 2426, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011296511627733707, + "timestamp": "2025-09-10 02:41:45.182073", + "step": 2427, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:45.236339", + "step": 2427, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005852424539625645, + "timestamp": "2025-09-10 02:41:45.245477", + "step": 2428, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:41:45.300019", + "step": 2428, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016052333638072014, + "timestamp": "2025-09-10 02:41:45.310485", + "step": 2429, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:45.365420", + "step": 2429, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015054935589432716, + "timestamp": "2025-09-10 02:41:45.368227", + "step": 2430, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:45.423205", + "step": 2430, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004304246511310339, + "timestamp": "2025-09-10 02:41:45.426311", + "step": 2431, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:45.479858", + "step": 2431, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027863934636116028, + "timestamp": "2025-09-10 02:41:45.488749", + "step": 2432, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:41:45.557116", + "step": 2432, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005833240691572428, + "timestamp": "2025-09-10 02:41:45.570344", + "step": 2433, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:45.624336", + "step": 2433, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0322459414601326, + "timestamp": "2025-09-10 02:41:45.626998", + "step": 2434, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:41:45.681501", + "step": 2434, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01197302620857954, + "timestamp": "2025-09-10 02:41:45.685217", + "step": 2435, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:45.749203", + "step": 2435, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010854208841919899, + "timestamp": "2025-09-10 02:41:45.760399", + "step": 2436, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:41:45.827042", + "step": 2436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009456430561840534, + "timestamp": "2025-09-10 02:41:45.840292", + "step": 2437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:45.896779", + "step": 2437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012631854973733425, + "timestamp": "2025-09-10 02:41:45.899205", + "step": 2438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:45.954025", + "step": 2438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017019402235746384, + "timestamp": "2025-09-10 02:41:45.963695", + "step": 2439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:46.017971", + "step": 2439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02528921328485012, + "timestamp": "2025-09-10 02:41:46.025113", + "step": 2440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:46.084778", + "step": 2440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014773269183933735, + "timestamp": "2025-09-10 02:41:46.087295", + "step": 2441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:46.140870", + "step": 2441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019137965515255928, + "timestamp": "2025-09-10 02:41:46.144041", + "step": 2442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:41:46.205957", + "step": 2442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027888478711247444, + "timestamp": "2025-09-10 02:41:46.216748", + "step": 2443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:46.270034", + "step": 2443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008485652506351471, + "timestamp": "2025-09-10 02:41:46.276132", + "step": 2444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:46.330371", + "step": 2444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008633838966488838, + "timestamp": "2025-09-10 02:41:46.332915", + "step": 2445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:41:46.408445", + "step": 2445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006705735344439745, + "timestamp": "2025-09-10 02:41:46.422401", + "step": 2446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:46.476260", + "step": 2446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025132521986961365, + "timestamp": "2025-09-10 02:41:46.478611", + "step": 2447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:46.532493", + "step": 2447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007952259853482246, + "timestamp": "2025-09-10 02:41:46.538553", + "step": 2448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:46.595229", + "step": 2448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004040360916405916, + "timestamp": "2025-09-10 02:41:46.606443", + "step": 2449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:46.660624", + "step": 2449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030639899894595146, + "timestamp": "2025-09-10 02:41:46.662944", + "step": 2450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:46.715888", + "step": 2450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033033587969839573, + "timestamp": "2025-09-10 02:41:46.718118", + "step": 2451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:41:46.792000", + "step": 2451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015835467725992203, + "timestamp": "2025-09-10 02:41:46.806528", + "step": 2452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:46.859315", + "step": 2452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012064282782375813, + "timestamp": "2025-09-10 02:41:46.861602", + "step": 2453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:46.914780", + "step": 2453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005674016196280718, + "timestamp": "2025-09-10 02:41:46.917810", + "step": 2454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:46.970676", + "step": 2454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005164622329175472, + "timestamp": "2025-09-10 02:41:46.972847", + "step": 2455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:47.025784", + "step": 2455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008402018807828426, + "timestamp": "2025-09-10 02:41:47.031651", + "step": 2456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:47.084786", + "step": 2456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016407065093517303, + "timestamp": "2025-09-10 02:41:47.086842", + "step": 2457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:41:47.139713", + "step": 2457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01931409165263176, + "timestamp": "2025-09-10 02:41:47.141811", + "step": 2458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:47.194991", + "step": 2458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01934720017015934, + "timestamp": "2025-09-10 02:41:47.197490", + "step": 2459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:47.250612", + "step": 2459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023362424224615097, + "timestamp": "2025-09-10 02:41:47.256818", + "step": 2460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:47.309839", + "step": 2460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004369188565760851, + "timestamp": "2025-09-10 02:41:47.319862", + "step": 2461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:47.373554", + "step": 2461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018872682703658938, + "timestamp": "2025-09-10 02:41:47.381835", + "step": 2462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:47.435820", + "step": 2462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004272147547453642, + "timestamp": "2025-09-10 02:41:47.437829", + "step": 2463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:47.491315", + "step": 2463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022475240752100945, + "timestamp": "2025-09-10 02:41:47.497437", + "step": 2464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:47.549730", + "step": 2464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02900022082030773, + "timestamp": "2025-09-10 02:41:47.552718", + "step": 2465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:47.605936", + "step": 2465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004683454986661673, + "timestamp": "2025-09-10 02:41:47.612094", + "step": 2466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:47.665686", + "step": 2466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017793446313589811, + "timestamp": "2025-09-10 02:41:47.668028", + "step": 2467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:41:47.721954", + "step": 2467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003159657586365938, + "timestamp": "2025-09-10 02:41:47.732542", + "step": 2468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:47.784826", + "step": 2468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002170691965147853, + "timestamp": "2025-09-10 02:41:47.787099", + "step": 2469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:47.839888", + "step": 2469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007415976841002703, + "timestamp": "2025-09-10 02:41:47.842089", + "step": 2470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:47.895689", + "step": 2470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011888401582837105, + "timestamp": "2025-09-10 02:41:47.897946", + "step": 2471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:47.951118", + "step": 2471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013907036744058132, + "timestamp": "2025-09-10 02:41:47.957292", + "step": 2472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:48.010767", + "step": 2472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038232323713600636, + "timestamp": "2025-09-10 02:41:48.013091", + "step": 2473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:48.071377", + "step": 2473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011546431109309196, + "timestamp": "2025-09-10 02:41:48.081835", + "step": 2474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:48.136415", + "step": 2474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.049716461449861526, + "timestamp": "2025-09-10 02:41:48.138794", + "step": 2475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:41:48.193331", + "step": 2475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024496106430888176, + "timestamp": "2025-09-10 02:41:48.203932", + "step": 2476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:41:48.260978", + "step": 2476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039366804994642735, + "timestamp": "2025-09-10 02:41:48.272209", + "step": 2477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:48.327203", + "step": 2477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01207022089511156, + "timestamp": "2025-09-10 02:41:48.336861", + "step": 2478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:48.390695", + "step": 2478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004953169729560614, + "timestamp": "2025-09-10 02:41:48.396560", + "step": 2479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:48.449961", + "step": 2479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016903908923268318, + "timestamp": "2025-09-10 02:41:48.456220", + "step": 2480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:41:48.509904", + "step": 2480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01183212548494339, + "timestamp": "2025-09-10 02:41:48.517085", + "step": 2481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:41:48.570966", + "step": 2481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014158886857330799, + "timestamp": "2025-09-10 02:41:48.573237", + "step": 2482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:41:48.627000", + "step": 2482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01064236555248499, + "timestamp": "2025-09-10 02:41:48.633111", + "step": 2483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:41:48.695532", + "step": 2483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020532239228487015, + "timestamp": "2025-09-10 02:41:48.707467", + "step": 2484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:48.760291", + "step": 2484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007942325435578823, + "timestamp": "2025-09-10 02:41:48.762784", + "step": 2485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:41:48.818027", + "step": 2485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003431000979617238, + "timestamp": "2025-09-10 02:41:48.827766", + "step": 2486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:41:48.880865", + "step": 2486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01795695349574089, + "timestamp": "2025-09-10 02:41:48.883333", + "step": 2487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:48.936778", + "step": 2487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010808142833411694, + "timestamp": "2025-09-10 02:41:48.943218", + "step": 2488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:41:48.996068", + "step": 2488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020689324010163546, + "timestamp": "2025-09-10 02:41:48.998663", + "step": 2489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:49.052140", + "step": 2489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010920782573521137, + "timestamp": "2025-09-10 02:41:49.054559", + "step": 2490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:49.108083", + "step": 2490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010443278588354588, + "timestamp": "2025-09-10 02:41:49.110707", + "step": 2491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:49.165121", + "step": 2491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003924945369362831, + "timestamp": "2025-09-10 02:41:49.175541", + "step": 2492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:49.227703", + "step": 2492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019283965229988098, + "timestamp": "2025-09-10 02:41:49.230014", + "step": 2493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:41:49.283338", + "step": 2493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004657456185668707, + "timestamp": "2025-09-10 02:41:49.292953", + "step": 2494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:41:49.346085", + "step": 2494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023488460574299097, + "timestamp": "2025-09-10 02:41:49.348389", + "step": 2495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:41:49.401378", + "step": 2495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0063967215828597546, + "timestamp": "2025-09-10 02:41:49.407362", + "step": 2496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:41:49.459703", + "step": 2496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004745048936456442, + "timestamp": "2025-09-10 02:41:49.462699", + "step": 2497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:41:49.523014", + "step": 2497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0107539938762784, + "timestamp": "2025-09-10 02:41:49.533782", + "step": 2498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:41:49.587341", + "step": 2498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009626333601772785, + "timestamp": "2025-09-10 02:41:49.589696", + "step": 2499, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:42:06.568005", + "step": 2499, + "epoch": 2 + }, + { + "type": "pplx", + "content": 25981030.787906833, + "timestamp": "2025-09-10 02:42:06.571091", + "step": 2499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:06.626271", + "step": 2499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008738330565392971, + "timestamp": "2025-09-10 02:42:06.632792", + "step": 2500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 2500", + "timestamp": "2025-09-10 02:42:07.029243", + "step": 2500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:07.087528", + "step": 2500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007609358872286975, + "timestamp": "2025-09-10 02:42:07.090183", + "step": 2501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:07.145221", + "step": 2501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012053739046677947, + "timestamp": "2025-09-10 02:42:07.150177", + "step": 2502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:07.204007", + "step": 2502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017292501404881477, + "timestamp": "2025-09-10 02:42:07.206127", + "step": 2503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:07.259753", + "step": 2503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0072624897584319115, + "timestamp": "2025-09-10 02:42:07.266741", + "step": 2504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:07.319575", + "step": 2504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014371352270245552, + "timestamp": "2025-09-10 02:42:07.322468", + "step": 2505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:07.376798", + "step": 2505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016270782798528671, + "timestamp": "2025-09-10 02:42:07.379055", + "step": 2506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:07.433164", + "step": 2506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005980811547487974, + "timestamp": "2025-09-10 02:42:07.435434", + "step": 2507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:42:07.496230", + "step": 2507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02112610451877117, + "timestamp": "2025-09-10 02:42:07.507937", + "step": 2508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:07.561007", + "step": 2508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008170275948941708, + "timestamp": "2025-09-10 02:42:07.566993", + "step": 2509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:42:07.620095", + "step": 2509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03375102952122688, + "timestamp": "2025-09-10 02:42:07.622691", + "step": 2510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:07.676206", + "step": 2510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024797100573778152, + "timestamp": "2025-09-10 02:42:07.678422", + "step": 2511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:07.732886", + "step": 2511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027288347482681274, + "timestamp": "2025-09-10 02:42:07.743469", + "step": 2512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:07.796086", + "step": 2512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011557974852621555, + "timestamp": "2025-09-10 02:42:07.798565", + "step": 2513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:42:07.851992", + "step": 2513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006120680831372738, + "timestamp": "2025-09-10 02:42:07.854720", + "step": 2514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:07.908900", + "step": 2514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012789232656359673, + "timestamp": "2025-09-10 02:42:07.918502", + "step": 2515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:07.973264", + "step": 2515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013009892776608467, + "timestamp": "2025-09-10 02:42:07.983798", + "step": 2516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:08.036492", + "step": 2516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021685867104679346, + "timestamp": "2025-09-10 02:42:08.042245", + "step": 2517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:08.097017", + "step": 2517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0203882846981287, + "timestamp": "2025-09-10 02:42:08.099114", + "step": 2518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:08.151885", + "step": 2518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002050741109997034, + "timestamp": "2025-09-10 02:42:08.154344", + "step": 2519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:08.207087", + "step": 2519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02386748231947422, + "timestamp": "2025-09-10 02:42:08.213253", + "step": 2520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:08.265795", + "step": 2520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0099802166223526, + "timestamp": "2025-09-10 02:42:08.268151", + "step": 2521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:08.326579", + "step": 2521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0045924014411866665, + "timestamp": "2025-09-10 02:42:08.328849", + "step": 2522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:08.384206", + "step": 2522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01153036393225193, + "timestamp": "2025-09-10 02:42:08.389379", + "step": 2523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:08.455327", + "step": 2523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021937353536486626, + "timestamp": "2025-09-10 02:42:08.465687", + "step": 2524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:42:08.524909", + "step": 2524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004579412750899792, + "timestamp": "2025-09-10 02:42:08.536495", + "step": 2525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:08.596939", + "step": 2525, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018722962588071823, + "timestamp": "2025-09-10 02:42:08.607382", + "step": 2526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:08.661044", + "step": 2526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006787048769183457, + "timestamp": "2025-09-10 02:42:08.663874", + "step": 2527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:08.716954", + "step": 2527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008177361451089382, + "timestamp": "2025-09-10 02:42:08.724654", + "step": 2528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:08.784235", + "step": 2528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024081855081021786, + "timestamp": "2025-09-10 02:42:08.794699", + "step": 2529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:42:08.856625", + "step": 2529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011337630450725555, + "timestamp": "2025-09-10 02:42:08.867399", + "step": 2530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:08.920892", + "step": 2530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02496512606739998, + "timestamp": "2025-09-10 02:42:08.923103", + "step": 2531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:08.976383", + "step": 2531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010318547487258911, + "timestamp": "2025-09-10 02:42:08.982674", + "step": 2532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:09.042989", + "step": 2532, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02730054035782814, + "timestamp": "2025-09-10 02:42:09.054326", + "step": 2533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:09.115072", + "step": 2533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0053492337465286255, + "timestamp": "2025-09-10 02:42:09.117797", + "step": 2534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:42:09.208932", + "step": 2534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012704300694167614, + "timestamp": "2025-09-10 02:42:09.226015", + "step": 2535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:09.287271", + "step": 2535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004206423182040453, + "timestamp": "2025-09-10 02:42:09.297872", + "step": 2536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:09.350842", + "step": 2536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001664001145400107, + "timestamp": "2025-09-10 02:42:09.353139", + "step": 2537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:09.409530", + "step": 2537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025458360090851784, + "timestamp": "2025-09-10 02:42:09.411956", + "step": 2538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:42:09.472858", + "step": 2538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030657034367322922, + "timestamp": "2025-09-10 02:42:09.483515", + "step": 2539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:42:09.547785", + "step": 2539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008289070799946785, + "timestamp": "2025-09-10 02:42:09.559625", + "step": 2540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:09.613178", + "step": 2540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0224428940564394, + "timestamp": "2025-09-10 02:42:09.615659", + "step": 2541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:09.670340", + "step": 2541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030413294211030006, + "timestamp": "2025-09-10 02:42:09.672690", + "step": 2542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:09.725701", + "step": 2542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02619311399757862, + "timestamp": "2025-09-10 02:42:09.728021", + "step": 2543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:09.786549", + "step": 2543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00035007070982828736, + "timestamp": "2025-09-10 02:42:09.792692", + "step": 2544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:09.846398", + "step": 2544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011450978927314281, + "timestamp": "2025-09-10 02:42:09.852888", + "step": 2545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:09.905693", + "step": 2545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01046075951308012, + "timestamp": "2025-09-10 02:42:09.909774", + "step": 2546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:09.973588", + "step": 2546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021498361602425575, + "timestamp": "2025-09-10 02:42:09.976636", + "step": 2547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:10.040432", + "step": 2547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002972465241327882, + "timestamp": "2025-09-10 02:42:10.046819", + "step": 2548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:42:10.107251", + "step": 2548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012440596707165241, + "timestamp": "2025-09-10 02:42:10.119054", + "step": 2549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:10.173417", + "step": 2549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005210519302636385, + "timestamp": "2025-09-10 02:42:10.176425", + "step": 2550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:10.229540", + "step": 2550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003876955946907401, + "timestamp": "2025-09-10 02:42:10.234986", + "step": 2551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:10.289517", + "step": 2551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014093932695686817, + "timestamp": "2025-09-10 02:42:10.295645", + "step": 2552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:10.357491", + "step": 2552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04181818291544914, + "timestamp": "2025-09-10 02:42:10.361557", + "step": 2553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:10.419717", + "step": 2553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014616904780268669, + "timestamp": "2025-09-10 02:42:10.425888", + "step": 2554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:10.481786", + "step": 2554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007901540026068687, + "timestamp": "2025-09-10 02:42:10.484385", + "step": 2555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:10.538021", + "step": 2555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030624257400631905, + "timestamp": "2025-09-10 02:42:10.547433", + "step": 2556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:10.601937", + "step": 2556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014362893998622894, + "timestamp": "2025-09-10 02:42:10.610077", + "step": 2557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:10.678890", + "step": 2557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006508413353003561, + "timestamp": "2025-09-10 02:42:10.681354", + "step": 2558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:10.734703", + "step": 2558, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011077557690441608, + "timestamp": "2025-09-10 02:42:10.737166", + "step": 2559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:10.801708", + "step": 2559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004156519193202257, + "timestamp": "2025-09-10 02:42:10.808071", + "step": 2560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:10.863207", + "step": 2560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007181708235293627, + "timestamp": "2025-09-10 02:42:10.869253", + "step": 2561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:10.936305", + "step": 2561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007056365138851106, + "timestamp": "2025-09-10 02:42:10.940350", + "step": 2562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:10.997489", + "step": 2562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005064928438514471, + "timestamp": "2025-09-10 02:42:10.999970", + "step": 2563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:11.052778", + "step": 2563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01664942502975464, + "timestamp": "2025-09-10 02:42:11.058984", + "step": 2564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:11.117091", + "step": 2564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005057492293417454, + "timestamp": "2025-09-10 02:42:11.126077", + "step": 2565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:11.183873", + "step": 2565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022189823910593987, + "timestamp": "2025-09-10 02:42:11.186537", + "step": 2566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:11.243687", + "step": 2566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026736078783869743, + "timestamp": "2025-09-10 02:42:11.246891", + "step": 2567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:11.304166", + "step": 2567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009926991537213326, + "timestamp": "2025-09-10 02:42:11.310299", + "step": 2568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:11.372035", + "step": 2568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00032137572998180985, + "timestamp": "2025-09-10 02:42:11.383674", + "step": 2569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:11.440946", + "step": 2569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036710728891193867, + "timestamp": "2025-09-10 02:42:11.451192", + "step": 2570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:11.512540", + "step": 2570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001445622299797833, + "timestamp": "2025-09-10 02:42:11.515330", + "step": 2571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:11.570002", + "step": 2571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015834486111998558, + "timestamp": "2025-09-10 02:42:11.576731", + "step": 2572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:11.637773", + "step": 2572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001168629853054881, + "timestamp": "2025-09-10 02:42:11.657947", + "step": 2573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:11.714785", + "step": 2573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004391381051391363, + "timestamp": "2025-09-10 02:42:11.719912", + "step": 2574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:11.781613", + "step": 2574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013341886922717094, + "timestamp": "2025-09-10 02:42:11.792033", + "step": 2575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:11.846213", + "step": 2575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005331250838935375, + "timestamp": "2025-09-10 02:42:11.857062", + "step": 2576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:11.914076", + "step": 2576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015829240903258324, + "timestamp": "2025-09-10 02:42:11.924564", + "step": 2577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:42:11.988526", + "step": 2577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023078806698322296, + "timestamp": "2025-09-10 02:42:11.999227", + "step": 2578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:12.058106", + "step": 2578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01056588999927044, + "timestamp": "2025-09-10 02:42:12.063308", + "step": 2579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:12.123520", + "step": 2579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007611948996782303, + "timestamp": "2025-09-10 02:42:12.131708", + "step": 2580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:12.185445", + "step": 2580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0280488021671772, + "timestamp": "2025-09-10 02:42:12.187941", + "step": 2581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:42:12.276173", + "step": 2581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011155783198773861, + "timestamp": "2025-09-10 02:42:12.291209", + "step": 2582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:12.354422", + "step": 2582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02470453456044197, + "timestamp": "2025-09-10 02:42:12.372665", + "step": 2583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:12.442356", + "step": 2583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014006450772285461, + "timestamp": "2025-09-10 02:42:12.451990", + "step": 2584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:12.508523", + "step": 2584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009627717547118664, + "timestamp": "2025-09-10 02:42:12.512268", + "step": 2585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:12.579979", + "step": 2585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020211519673466682, + "timestamp": "2025-09-10 02:42:12.590422", + "step": 2586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:12.645995", + "step": 2586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022182533517479897, + "timestamp": "2025-09-10 02:42:12.655792", + "step": 2587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:42:12.724924", + "step": 2587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0041777160950005054, + "timestamp": "2025-09-10 02:42:12.737917", + "step": 2588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:12.794653", + "step": 2588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015036248601973057, + "timestamp": "2025-09-10 02:42:12.799925", + "step": 2589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:12.863312", + "step": 2589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01882903091609478, + "timestamp": "2025-09-10 02:42:12.872651", + "step": 2590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:12.927709", + "step": 2590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014924319693818688, + "timestamp": "2025-09-10 02:42:12.936897", + "step": 2591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:12.995248", + "step": 2591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01842663064599037, + "timestamp": "2025-09-10 02:42:13.001482", + "step": 2592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:13.055468", + "step": 2592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039041806012392044, + "timestamp": "2025-09-10 02:42:13.057678", + "step": 2593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:13.112199", + "step": 2593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02539738453924656, + "timestamp": "2025-09-10 02:42:13.115025", + "step": 2594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:13.173590", + "step": 2594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013453267747536302, + "timestamp": "2025-09-10 02:42:13.175773", + "step": 2595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:13.228884", + "step": 2595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006421868456527591, + "timestamp": "2025-09-10 02:42:13.234862", + "step": 2596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:42:13.300119", + "step": 2596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009963750839233398, + "timestamp": "2025-09-10 02:42:13.313350", + "step": 2597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:13.366972", + "step": 2597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004111562855541706, + "timestamp": "2025-09-10 02:42:13.369820", + "step": 2598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:13.429167", + "step": 2598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011045041494071484, + "timestamp": "2025-09-10 02:42:13.431065", + "step": 2599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:13.490950", + "step": 2599, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011161820963025093, + "timestamp": "2025-09-10 02:42:13.499903", + "step": 2600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:42:13.553457", + "step": 2600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025803251191973686, + "timestamp": "2025-09-10 02:42:13.555982", + "step": 2601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:13.610149", + "step": 2601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0559576153755188, + "timestamp": "2025-09-10 02:42:13.612385", + "step": 2602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:13.667786", + "step": 2602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006970132235437632, + "timestamp": "2025-09-10 02:42:13.670042", + "step": 2603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:13.725022", + "step": 2603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007758519612252712, + "timestamp": "2025-09-10 02:42:13.731688", + "step": 2604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:42:13.800640", + "step": 2604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011060030199587345, + "timestamp": "2025-09-10 02:42:13.814266", + "step": 2605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:13.867599", + "step": 2605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034862495958805084, + "timestamp": "2025-09-10 02:42:13.870103", + "step": 2606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:13.928272", + "step": 2606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031833648681640625, + "timestamp": "2025-09-10 02:42:13.938736", + "step": 2607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:13.994791", + "step": 2607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019451836124062538, + "timestamp": "2025-09-10 02:42:14.006616", + "step": 2608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:14.065944", + "step": 2608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014401193475350738, + "timestamp": "2025-09-10 02:42:14.076502", + "step": 2609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:14.131836", + "step": 2609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009075532434508204, + "timestamp": "2025-09-10 02:42:14.134143", + "step": 2610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:14.189787", + "step": 2610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01933993212878704, + "timestamp": "2025-09-10 02:42:14.193346", + "step": 2611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:14.247275", + "step": 2611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005016653332859278, + "timestamp": "2025-09-10 02:42:14.255144", + "step": 2612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:14.315946", + "step": 2612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005122971720993519, + "timestamp": "2025-09-10 02:42:14.318496", + "step": 2613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:14.375474", + "step": 2613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013108565472066402, + "timestamp": "2025-09-10 02:42:14.381339", + "step": 2614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:14.436304", + "step": 2614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009488807059824467, + "timestamp": "2025-09-10 02:42:14.443848", + "step": 2615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:14.501883", + "step": 2615, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001307025202549994, + "timestamp": "2025-09-10 02:42:14.508215", + "step": 2616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:14.566460", + "step": 2616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008845310658216476, + "timestamp": "2025-09-10 02:42:14.573955", + "step": 2617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:14.633049", + "step": 2617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013522865250706673, + "timestamp": "2025-09-10 02:42:14.643474", + "step": 2618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:14.702218", + "step": 2618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.048304710537195206, + "timestamp": "2025-09-10 02:42:14.704420", + "step": 2619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:42:14.758952", + "step": 2619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02768402360379696, + "timestamp": "2025-09-10 02:42:14.764864", + "step": 2620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:14.818335", + "step": 2620, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01532816793769598, + "timestamp": "2025-09-10 02:42:14.826127", + "step": 2621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:14.881340", + "step": 2621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010250316001474857, + "timestamp": "2025-09-10 02:42:14.884104", + "step": 2622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:14.944719", + "step": 2622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020182248204946518, + "timestamp": "2025-09-10 02:42:14.955117", + "step": 2623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:15.009701", + "step": 2623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02402585744857788, + "timestamp": "2025-09-10 02:42:15.015572", + "step": 2624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:42:15.068303", + "step": 2624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004449274856597185, + "timestamp": "2025-09-10 02:42:15.070215", + "step": 2625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:15.123229", + "step": 2625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011047663167119026, + "timestamp": "2025-09-10 02:42:15.126661", + "step": 2626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:15.187472", + "step": 2626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017207256751134992, + "timestamp": "2025-09-10 02:42:15.197297", + "step": 2627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:15.250485", + "step": 2627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0061110020615160465, + "timestamp": "2025-09-10 02:42:15.257519", + "step": 2628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:15.310959", + "step": 2628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01842239871621132, + "timestamp": "2025-09-10 02:42:15.313981", + "step": 2629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:15.366946", + "step": 2629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008514808490872383, + "timestamp": "2025-09-10 02:42:15.372055", + "step": 2630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:15.427967", + "step": 2630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007261297665536404, + "timestamp": "2025-09-10 02:42:15.429986", + "step": 2631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:15.483107", + "step": 2631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009131209808401763, + "timestamp": "2025-09-10 02:42:15.490035", + "step": 2632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:15.545992", + "step": 2632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014827689155936241, + "timestamp": "2025-09-10 02:42:15.548415", + "step": 2633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:42:15.609305", + "step": 2633, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019348548725247383, + "timestamp": "2025-09-10 02:42:15.620054", + "step": 2634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:15.674050", + "step": 2634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006343338638544083, + "timestamp": "2025-09-10 02:42:15.677389", + "step": 2635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:15.730967", + "step": 2635, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004739842377603054, + "timestamp": "2025-09-10 02:42:15.736942", + "step": 2636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:15.789883", + "step": 2636, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0178393367677927, + "timestamp": "2025-09-10 02:42:15.792766", + "step": 2637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:15.847380", + "step": 2637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02419670857489109, + "timestamp": "2025-09-10 02:42:15.857004", + "step": 2638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:42:15.911951", + "step": 2638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009919347241520882, + "timestamp": "2025-09-10 02:42:15.915094", + "step": 2639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:15.971119", + "step": 2639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02844708040356636, + "timestamp": "2025-09-10 02:42:15.980829", + "step": 2640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:16.035899", + "step": 2640, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01855628378689289, + "timestamp": "2025-09-10 02:42:16.038624", + "step": 2641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:42:16.091865", + "step": 2641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015538026578724384, + "timestamp": "2025-09-10 02:42:16.094395", + "step": 2642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:16.147594", + "step": 2642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01040005125105381, + "timestamp": "2025-09-10 02:42:16.150585", + "step": 2643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:16.204385", + "step": 2643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01307311374694109, + "timestamp": "2025-09-10 02:42:16.210564", + "step": 2644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:16.264696", + "step": 2644, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003692588536068797, + "timestamp": "2025-09-10 02:42:16.266689", + "step": 2645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:16.320118", + "step": 2645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010986563749611378, + "timestamp": "2025-09-10 02:42:16.326751", + "step": 2646, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:42:33.251418", + "step": 2646, + "epoch": 2 + }, + { + "type": "pplx", + "content": 21589005.77936028, + "timestamp": "2025-09-10 02:42:33.255355", + "step": 2646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:33.317311", + "step": 2646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002613171236589551, + "timestamp": "2025-09-10 02:42:33.321516", + "step": 2647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:33.396238", + "step": 2647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004713746253401041, + "timestamp": "2025-09-10 02:42:33.405833", + "step": 2648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:33.471657", + "step": 2648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004867583978921175, + "timestamp": "2025-09-10 02:42:33.479520", + "step": 2649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:33.539800", + "step": 2649, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01249657291918993, + "timestamp": "2025-09-10 02:42:33.545094", + "step": 2650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:33.616243", + "step": 2650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006686859764158726, + "timestamp": "2025-09-10 02:42:33.618738", + "step": 2651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:42:33.690537", + "step": 2651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002885418012738228, + "timestamp": "2025-09-10 02:42:33.702052", + "step": 2652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:42:33.769061", + "step": 2652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002413894282653928, + "timestamp": "2025-09-10 02:42:33.781093", + "step": 2653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:33.838014", + "step": 2653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009109397418797016, + "timestamp": "2025-09-10 02:42:33.840226", + "step": 2654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:33.893817", + "step": 2654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006080333609133959, + "timestamp": "2025-09-10 02:42:33.896266", + "step": 2655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:42:33.963193", + "step": 2655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030370343010872602, + "timestamp": "2025-09-10 02:42:33.976212", + "step": 2656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:42:34.029410", + "step": 2656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020097937434911728, + "timestamp": "2025-09-10 02:42:34.031403", + "step": 2657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:34.084467", + "step": 2657, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011799024417996407, + "timestamp": "2025-09-10 02:42:34.086800", + "step": 2658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:34.139900", + "step": 2658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.056280989199876785, + "timestamp": "2025-09-10 02:42:34.146195", + "step": 2659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:34.199236", + "step": 2659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009348825551569462, + "timestamp": "2025-09-10 02:42:34.206494", + "step": 2660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:34.259166", + "step": 2660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003881684970110655, + "timestamp": "2025-09-10 02:42:34.261249", + "step": 2661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:34.314263", + "step": 2661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010265841148793697, + "timestamp": "2025-09-10 02:42:34.320976", + "step": 2662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:34.374334", + "step": 2662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018050475046038628, + "timestamp": "2025-09-10 02:42:34.382425", + "step": 2663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:34.437345", + "step": 2663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006232109852135181, + "timestamp": "2025-09-10 02:42:34.447928", + "step": 2664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:34.500431", + "step": 2664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008110949769616127, + "timestamp": "2025-09-10 02:42:34.503387", + "step": 2665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:34.556326", + "step": 2665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00312112458050251, + "timestamp": "2025-09-10 02:42:34.558528", + "step": 2666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:34.611331", + "step": 2666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009352847002446651, + "timestamp": "2025-09-10 02:42:34.613826", + "step": 2667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:34.666788", + "step": 2667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004420316778123379, + "timestamp": "2025-09-10 02:42:34.672342", + "step": 2668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:42:34.732446", + "step": 2668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024199034087359905, + "timestamp": "2025-09-10 02:42:34.744501", + "step": 2669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:42:34.804893", + "step": 2669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010698945261538029, + "timestamp": "2025-09-10 02:42:34.815579", + "step": 2670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:34.870561", + "step": 2670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01290913950651884, + "timestamp": "2025-09-10 02:42:34.880198", + "step": 2671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:34.933156", + "step": 2671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004819564521312714, + "timestamp": "2025-09-10 02:42:34.938921", + "step": 2672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:34.991213", + "step": 2672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025426404550671577, + "timestamp": "2025-09-10 02:42:34.993282", + "step": 2673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:35.046705", + "step": 2673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014216218842193484, + "timestamp": "2025-09-10 02:42:35.048943", + "step": 2674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:35.102060", + "step": 2674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002475936198607087, + "timestamp": "2025-09-10 02:42:35.110267", + "step": 2675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:35.166947", + "step": 2675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004885523580014706, + "timestamp": "2025-09-10 02:42:35.172495", + "step": 2676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:35.224765", + "step": 2676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006973663694225252, + "timestamp": "2025-09-10 02:42:35.231445", + "step": 2677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:42:35.297944", + "step": 2677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007346330676227808, + "timestamp": "2025-09-10 02:42:35.310174", + "step": 2678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:35.363024", + "step": 2678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001958871725946665, + "timestamp": "2025-09-10 02:42:35.366060", + "step": 2679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:35.420796", + "step": 2679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05266200378537178, + "timestamp": "2025-09-10 02:42:35.431379", + "step": 2680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:35.484412", + "step": 2680, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008676085621118546, + "timestamp": "2025-09-10 02:42:35.486672", + "step": 2681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:35.539661", + "step": 2681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03698348626494408, + "timestamp": "2025-09-10 02:42:35.542778", + "step": 2682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:35.595535", + "step": 2682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012144326465204358, + "timestamp": "2025-09-10 02:42:35.597663", + "step": 2683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:35.650352", + "step": 2683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023621609434485435, + "timestamp": "2025-09-10 02:42:35.656289", + "step": 2684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:35.709448", + "step": 2684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025063061621040106, + "timestamp": "2025-09-10 02:42:35.715306", + "step": 2685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:35.768595", + "step": 2685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00428465660661459, + "timestamp": "2025-09-10 02:42:35.770786", + "step": 2686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:35.823766", + "step": 2686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007750555872917175, + "timestamp": "2025-09-10 02:42:35.825883", + "step": 2687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:35.878688", + "step": 2687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037932603154331446, + "timestamp": "2025-09-10 02:42:35.884619", + "step": 2688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:35.937112", + "step": 2688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010463619604706764, + "timestamp": "2025-09-10 02:42:35.943649", + "step": 2689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:35.996983", + "step": 2689, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05804077908396721, + "timestamp": "2025-09-10 02:42:36.000003", + "step": 2690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:36.053248", + "step": 2690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007580592297017574, + "timestamp": "2025-09-10 02:42:36.055539", + "step": 2691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:36.108885", + "step": 2691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016909649595618248, + "timestamp": "2025-09-10 02:42:36.114773", + "step": 2692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:36.166611", + "step": 2692, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001259945216588676, + "timestamp": "2025-09-10 02:42:36.168707", + "step": 2693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:36.221381", + "step": 2693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01103545818477869, + "timestamp": "2025-09-10 02:42:36.223497", + "step": 2694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:36.276598", + "step": 2694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001709669129922986, + "timestamp": "2025-09-10 02:42:36.278655", + "step": 2695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:36.331402", + "step": 2695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010664992965757847, + "timestamp": "2025-09-10 02:42:36.337416", + "step": 2696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:36.390392", + "step": 2696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004748235456645489, + "timestamp": "2025-09-10 02:42:36.392555", + "step": 2697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:36.445956", + "step": 2697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009418095462024212, + "timestamp": "2025-09-10 02:42:36.452519", + "step": 2698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:42:36.505660", + "step": 2698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035032329615205526, + "timestamp": "2025-09-10 02:42:36.508011", + "step": 2699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:36.561291", + "step": 2699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011930056847631931, + "timestamp": "2025-09-10 02:42:36.567198", + "step": 2700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:36.620002", + "step": 2700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012250921688973904, + "timestamp": "2025-09-10 02:42:36.622180", + "step": 2701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:36.676573", + "step": 2701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009608532302081585, + "timestamp": "2025-09-10 02:42:36.686131", + "step": 2702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:36.739573", + "step": 2702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023671496659517288, + "timestamp": "2025-09-10 02:42:36.741793", + "step": 2703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:36.794709", + "step": 2703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012722143437713385, + "timestamp": "2025-09-10 02:42:36.803624", + "step": 2704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:36.856913", + "step": 2704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010743381455540657, + "timestamp": "2025-09-10 02:42:36.859052", + "step": 2705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:36.911906", + "step": 2705, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019930407404899597, + "timestamp": "2025-09-10 02:42:36.915550", + "step": 2706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:36.970775", + "step": 2706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002769148675724864, + "timestamp": "2025-09-10 02:42:36.972941", + "step": 2707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:37.025892", + "step": 2707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009284740313887596, + "timestamp": "2025-09-10 02:42:37.034839", + "step": 2708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:37.087313", + "step": 2708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016837477451190352, + "timestamp": "2025-09-10 02:42:37.089393", + "step": 2709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:37.141829", + "step": 2709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018571644322946668, + "timestamp": "2025-09-10 02:42:37.144144", + "step": 2710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:37.196739", + "step": 2710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022031908854842186, + "timestamp": "2025-09-10 02:42:37.199771", + "step": 2711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:37.253667", + "step": 2711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0071655805222690105, + "timestamp": "2025-09-10 02:42:37.264081", + "step": 2712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:37.320944", + "step": 2712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024215078447014093, + "timestamp": "2025-09-10 02:42:37.332179", + "step": 2713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:37.387476", + "step": 2713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002322185318917036, + "timestamp": "2025-09-10 02:42:37.395706", + "step": 2714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:42:37.492031", + "step": 2714, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01126450952142477, + "timestamp": "2025-09-10 02:42:37.510566", + "step": 2715, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:37.564284", + "step": 2715, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001423872308805585, + "timestamp": "2025-09-10 02:42:37.570146", + "step": 2716, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:37.622656", + "step": 2716, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017503226408734918, + "timestamp": "2025-09-10 02:42:37.624563", + "step": 2717, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:37.677528", + "step": 2717, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006705854902975261, + "timestamp": "2025-09-10 02:42:37.680576", + "step": 2718, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:37.734212", + "step": 2718, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03989630937576294, + "timestamp": "2025-09-10 02:42:37.736145", + "step": 2719, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:37.789056", + "step": 2719, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011698446236550808, + "timestamp": "2025-09-10 02:42:37.794597", + "step": 2720, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:37.848021", + "step": 2720, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012054798426106572, + "timestamp": "2025-09-10 02:42:37.858529", + "step": 2721, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:37.911662", + "step": 2721, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011886332649737597, + "timestamp": "2025-09-10 02:42:37.914119", + "step": 2722, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:37.967450", + "step": 2722, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003914439585059881, + "timestamp": "2025-09-10 02:42:37.970396", + "step": 2723, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:38.024044", + "step": 2723, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028424093034118414, + "timestamp": "2025-09-10 02:42:38.029542", + "step": 2724, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:38.082991", + "step": 2724, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03507957234978676, + "timestamp": "2025-09-10 02:42:38.085239", + "step": 2725, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:38.139797", + "step": 2725, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009752371115610003, + "timestamp": "2025-09-10 02:42:38.141936", + "step": 2726, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:38.196679", + "step": 2726, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025734584778547287, + "timestamp": "2025-09-10 02:42:38.199191", + "step": 2727, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:38.252712", + "step": 2727, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001260034623555839, + "timestamp": "2025-09-10 02:42:38.258701", + "step": 2728, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:38.311909", + "step": 2728, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016585452249273658, + "timestamp": "2025-09-10 02:42:38.319833", + "step": 2729, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:38.372468", + "step": 2729, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003432965313550085, + "timestamp": "2025-09-10 02:42:38.374871", + "step": 2730, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:38.428192", + "step": 2730, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05336865410208702, + "timestamp": "2025-09-10 02:42:38.430322", + "step": 2731, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:38.485131", + "step": 2731, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027787829749286175, + "timestamp": "2025-09-10 02:42:38.491096", + "step": 2732, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:38.545041", + "step": 2732, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017570756375789642, + "timestamp": "2025-09-10 02:42:38.555526", + "step": 2733, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:42:38.630530", + "step": 2733, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010207007639110088, + "timestamp": "2025-09-10 02:42:38.644572", + "step": 2734, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:38.697695", + "step": 2734, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027272040024399757, + "timestamp": "2025-09-10 02:42:38.699974", + "step": 2735, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:38.752805", + "step": 2735, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002277930034324527, + "timestamp": "2025-09-10 02:42:38.758716", + "step": 2736, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:38.811435", + "step": 2736, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005421612877398729, + "timestamp": "2025-09-10 02:42:38.813791", + "step": 2737, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:38.868472", + "step": 2737, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007791418465785682, + "timestamp": "2025-09-10 02:42:38.870795", + "step": 2738, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:38.924295", + "step": 2738, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012010777136310935, + "timestamp": "2025-09-10 02:42:38.926487", + "step": 2739, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:38.980083", + "step": 2739, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03191911056637764, + "timestamp": "2025-09-10 02:42:38.986061", + "step": 2740, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:39.039049", + "step": 2740, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018431125208735466, + "timestamp": "2025-09-10 02:42:39.045412", + "step": 2741, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:39.103936", + "step": 2741, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011000008322298527, + "timestamp": "2025-09-10 02:42:39.114390", + "step": 2742, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:39.167744", + "step": 2742, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016186822205781937, + "timestamp": "2025-09-10 02:42:39.170221", + "step": 2743, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:39.223322", + "step": 2743, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029471402522176504, + "timestamp": "2025-09-10 02:42:39.229264", + "step": 2744, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:42:39.289150", + "step": 2744, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023999048862606287, + "timestamp": "2025-09-10 02:42:39.300740", + "step": 2745, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:39.353939", + "step": 2745, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022870872635394335, + "timestamp": "2025-09-10 02:42:39.356223", + "step": 2746, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:39.409578", + "step": 2746, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015325046144425869, + "timestamp": "2025-09-10 02:42:39.411779", + "step": 2747, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:39.465311", + "step": 2747, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005069035571068525, + "timestamp": "2025-09-10 02:42:39.474322", + "step": 2748, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:39.526720", + "step": 2748, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03320688381791115, + "timestamp": "2025-09-10 02:42:39.529539", + "step": 2749, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:39.585444", + "step": 2749, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04804794862866402, + "timestamp": "2025-09-10 02:42:39.590258", + "step": 2750, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:39.644796", + "step": 2750, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008649543859064579, + "timestamp": "2025-09-10 02:42:39.649829", + "step": 2751, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:39.704553", + "step": 2751, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009506465867161751, + "timestamp": "2025-09-10 02:42:39.711124", + "step": 2752, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:39.765100", + "step": 2752, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006533870473504066, + "timestamp": "2025-09-10 02:42:39.775425", + "step": 2753, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:39.829679", + "step": 2753, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020588845654856414, + "timestamp": "2025-09-10 02:42:39.831895", + "step": 2754, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:39.886362", + "step": 2754, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023811091668903828, + "timestamp": "2025-09-10 02:42:39.888657", + "step": 2755, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:39.943347", + "step": 2755, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011465086601674557, + "timestamp": "2025-09-10 02:42:39.949635", + "step": 2756, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:40.003558", + "step": 2756, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006404041778296232, + "timestamp": "2025-09-10 02:42:40.005994", + "step": 2757, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:40.059263", + "step": 2757, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008716910146176815, + "timestamp": "2025-09-10 02:42:40.061377", + "step": 2758, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:40.115747", + "step": 2758, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008281629532575607, + "timestamp": "2025-09-10 02:42:40.121771", + "step": 2759, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:40.176569", + "step": 2759, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001604673219844699, + "timestamp": "2025-09-10 02:42:40.187189", + "step": 2760, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:40.239493", + "step": 2760, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006515085697174072, + "timestamp": "2025-09-10 02:42:40.241659", + "step": 2761, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:40.294362", + "step": 2761, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008057648316025734, + "timestamp": "2025-09-10 02:42:40.296538", + "step": 2762, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:40.349941", + "step": 2762, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005328737664967775, + "timestamp": "2025-09-10 02:42:40.352159", + "step": 2763, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:40.405354", + "step": 2763, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034779810812324286, + "timestamp": "2025-09-10 02:42:40.411107", + "step": 2764, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:40.463651", + "step": 2764, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028056337032467127, + "timestamp": "2025-09-10 02:42:40.473935", + "step": 2765, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:42:40.528692", + "step": 2765, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00816755834966898, + "timestamp": "2025-09-10 02:42:40.538491", + "step": 2766, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:40.591380", + "step": 2766, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0049702781252563, + "timestamp": "2025-09-10 02:42:40.593516", + "step": 2767, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:40.646966", + "step": 2767, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041935887187719345, + "timestamp": "2025-09-10 02:42:40.652587", + "step": 2768, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:40.704843", + "step": 2768, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027195082511752844, + "timestamp": "2025-09-10 02:42:40.707043", + "step": 2769, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:40.760335", + "step": 2769, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028412478044629097, + "timestamp": "2025-09-10 02:42:40.762543", + "step": 2770, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:40.815461", + "step": 2770, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00922696478664875, + "timestamp": "2025-09-10 02:42:40.817892", + "step": 2771, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:40.871019", + "step": 2771, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022764507681131363, + "timestamp": "2025-09-10 02:42:40.876910", + "step": 2772, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:40.929034", + "step": 2772, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006302524823695421, + "timestamp": "2025-09-10 02:42:40.935838", + "step": 2773, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:40.988522", + "step": 2773, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01538578700274229, + "timestamp": "2025-09-10 02:42:40.995275", + "step": 2774, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:41.049281", + "step": 2774, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008590908721089363, + "timestamp": "2025-09-10 02:42:41.051591", + "step": 2775, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:41.105108", + "step": 2775, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027642270550131798, + "timestamp": "2025-09-10 02:42:41.112701", + "step": 2776, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:41.165405", + "step": 2776, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028574557974934578, + "timestamp": "2025-09-10 02:42:41.172240", + "step": 2777, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:41.225251", + "step": 2777, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030429674312472343, + "timestamp": "2025-09-10 02:42:41.233462", + "step": 2778, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:41.286597", + "step": 2778, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01037047989666462, + "timestamp": "2025-09-10 02:42:41.294779", + "step": 2779, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:41.348226", + "step": 2779, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00216799252666533, + "timestamp": "2025-09-10 02:42:41.355519", + "step": 2780, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:41.408134", + "step": 2780, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01218800712376833, + "timestamp": "2025-09-10 02:42:41.410863", + "step": 2781, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:41.463785", + "step": 2781, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006440795958042145, + "timestamp": "2025-09-10 02:42:41.466147", + "step": 2782, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:41.518848", + "step": 2782, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038501867093145847, + "timestamp": "2025-09-10 02:42:41.521100", + "step": 2783, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:42:41.574028", + "step": 2783, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0091011431068182, + "timestamp": "2025-09-10 02:42:41.580889", + "step": 2784, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:41.638278", + "step": 2784, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009202203713357449, + "timestamp": "2025-09-10 02:42:41.645142", + "step": 2785, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:42:41.702926", + "step": 2785, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011785599635913968, + "timestamp": "2025-09-10 02:42:41.711291", + "step": 2786, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:42:41.764352", + "step": 2786, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001529795117676258, + "timestamp": "2025-09-10 02:42:41.770960", + "step": 2787, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:41.826832", + "step": 2787, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0076119364239275455, + "timestamp": "2025-09-10 02:42:41.837184", + "step": 2788, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:42:41.897032", + "step": 2788, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010635657235980034, + "timestamp": "2025-09-10 02:42:41.899066", + "step": 2789, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:41.952188", + "step": 2789, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03798241168260574, + "timestamp": "2025-09-10 02:42:41.954297", + "step": 2790, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:42:42.012230", + "step": 2790, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027064664289355278, + "timestamp": "2025-09-10 02:42:42.014487", + "step": 2791, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:42.067891", + "step": 2791, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005027863197028637, + "timestamp": "2025-09-10 02:42:42.073778", + "step": 2792, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:42.136063", + "step": 2792, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008662876673042774, + "timestamp": "2025-09-10 02:42:42.138373", + "step": 2793, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:42:59.342017", + "step": 2793, + "epoch": 2 + }, + { + "type": "pplx", + "content": 19602792.91243142, + "timestamp": "2025-09-10 02:42:59.345053", + "step": 2793, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:42:59.400535", + "step": 2793, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018834838410839438, + "timestamp": "2025-09-10 02:42:59.407707", + "step": 2794, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:42:59.467168", + "step": 2794, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05520009994506836, + "timestamp": "2025-09-10 02:42:59.477603", + "step": 2795, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:42:59.531430", + "step": 2795, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007993536069989204, + "timestamp": "2025-09-10 02:42:59.537969", + "step": 2796, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:42:59.591473", + "step": 2796, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039032420609146357, + "timestamp": "2025-09-10 02:42:59.594265", + "step": 2797, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:42:59.647649", + "step": 2797, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015108529478311539, + "timestamp": "2025-09-10 02:42:59.649799", + "step": 2798, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:59.703077", + "step": 2798, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0070057399570941925, + "timestamp": "2025-09-10 02:42:59.705393", + "step": 2799, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:59.760627", + "step": 2799, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003342447802424431, + "timestamp": "2025-09-10 02:42:59.766720", + "step": 2800, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:59.820615", + "step": 2800, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02086058259010315, + "timestamp": "2025-09-10 02:42:59.823304", + "step": 2801, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:42:59.886675", + "step": 2801, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03028605319559574, + "timestamp": "2025-09-10 02:42:59.897796", + "step": 2802, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:42:59.950926", + "step": 2802, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018823647871613503, + "timestamp": "2025-09-10 02:42:59.952971", + "step": 2803, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:00.006706", + "step": 2803, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008998965844511986, + "timestamp": "2025-09-10 02:43:00.012786", + "step": 2804, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:00.066314", + "step": 2804, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027237108442932367, + "timestamp": "2025-09-10 02:43:00.076813", + "step": 2805, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:00.130386", + "step": 2805, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01370945107191801, + "timestamp": "2025-09-10 02:43:00.138318", + "step": 2806, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:00.192275", + "step": 2806, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024837393313646317, + "timestamp": "2025-09-10 02:43:00.194319", + "step": 2807, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:00.247402", + "step": 2807, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018284594640135765, + "timestamp": "2025-09-10 02:43:00.253438", + "step": 2808, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:00.305915", + "step": 2808, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006133030168712139, + "timestamp": "2025-09-10 02:43:00.308178", + "step": 2809, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:00.361104", + "step": 2809, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009379004128277302, + "timestamp": "2025-09-10 02:43:00.363271", + "step": 2810, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:00.418289", + "step": 2810, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00395641615614295, + "timestamp": "2025-09-10 02:43:00.428126", + "step": 2811, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:00.481887", + "step": 2811, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022432571277022362, + "timestamp": "2025-09-10 02:43:00.489433", + "step": 2812, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:00.543822", + "step": 2812, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022870022803545, + "timestamp": "2025-09-10 02:43:00.546439", + "step": 2813, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:00.600026", + "step": 2813, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036130433436483145, + "timestamp": "2025-09-10 02:43:00.602956", + "step": 2814, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:00.656359", + "step": 2814, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028867374639958143, + "timestamp": "2025-09-10 02:43:00.658538", + "step": 2815, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:00.711526", + "step": 2815, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003171600867062807, + "timestamp": "2025-09-10 02:43:00.717833", + "step": 2816, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:00.770845", + "step": 2816, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01314304955303669, + "timestamp": "2025-09-10 02:43:00.773251", + "step": 2817, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:00.826399", + "step": 2817, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011470197699964046, + "timestamp": "2025-09-10 02:43:00.828940", + "step": 2818, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:00.882086", + "step": 2818, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009405607357621193, + "timestamp": "2025-09-10 02:43:00.885681", + "step": 2819, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:00.946119", + "step": 2819, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006938908249139786, + "timestamp": "2025-09-10 02:43:00.957331", + "step": 2820, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:01.013266", + "step": 2820, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03679962828755379, + "timestamp": "2025-09-10 02:43:01.023258", + "step": 2821, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:01.076607", + "step": 2821, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01181592233479023, + "timestamp": "2025-09-10 02:43:01.078852", + "step": 2822, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:01.132584", + "step": 2822, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005727309733629227, + "timestamp": "2025-09-10 02:43:01.134665", + "step": 2823, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:01.190903", + "step": 2823, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011616931296885014, + "timestamp": "2025-09-10 02:43:01.196676", + "step": 2824, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:01.254901", + "step": 2824, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011883147992193699, + "timestamp": "2025-09-10 02:43:01.263117", + "step": 2825, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:01.316817", + "step": 2825, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008945704437792301, + "timestamp": "2025-09-10 02:43:01.323132", + "step": 2826, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:01.379303", + "step": 2826, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015747204422950745, + "timestamp": "2025-09-10 02:43:01.381679", + "step": 2827, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:01.438407", + "step": 2827, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0066430093720555305, + "timestamp": "2025-09-10 02:43:01.445440", + "step": 2828, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:43:01.497663", + "step": 2828, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002757689217105508, + "timestamp": "2025-09-10 02:43:01.499936", + "step": 2829, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:01.554252", + "step": 2829, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01923009753227234, + "timestamp": "2025-09-10 02:43:01.556463", + "step": 2830, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:01.609841", + "step": 2830, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0152445612475276, + "timestamp": "2025-09-10 02:43:01.611921", + "step": 2831, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:01.664769", + "step": 2831, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026830332353711128, + "timestamp": "2025-09-10 02:43:01.670665", + "step": 2832, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:01.723139", + "step": 2832, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018680280074477196, + "timestamp": "2025-09-10 02:43:01.725242", + "step": 2833, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:01.777902", + "step": 2833, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007404194213449955, + "timestamp": "2025-09-10 02:43:01.780418", + "step": 2834, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:01.832885", + "step": 2834, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023370621725916862, + "timestamp": "2025-09-10 02:43:01.836092", + "step": 2835, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:01.889454", + "step": 2835, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015749046579003334, + "timestamp": "2025-09-10 02:43:01.897009", + "step": 2836, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:01.949050", + "step": 2836, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016447792295366526, + "timestamp": "2025-09-10 02:43:01.951174", + "step": 2837, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:02.004004", + "step": 2837, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0069174072705209255, + "timestamp": "2025-09-10 02:43:02.006141", + "step": 2838, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:02.058649", + "step": 2838, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037390643265098333, + "timestamp": "2025-09-10 02:43:02.062924", + "step": 2839, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:02.116690", + "step": 2839, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00888837967067957, + "timestamp": "2025-09-10 02:43:02.122647", + "step": 2840, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:02.174790", + "step": 2840, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004037219565361738, + "timestamp": "2025-09-10 02:43:02.181377", + "step": 2841, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:02.234740", + "step": 2841, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026061559095978737, + "timestamp": "2025-09-10 02:43:02.236837", + "step": 2842, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:43:02.303534", + "step": 2842, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011404995806515217, + "timestamp": "2025-09-10 02:43:02.315799", + "step": 2843, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:02.371015", + "step": 2843, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029660488944500685, + "timestamp": "2025-09-10 02:43:02.376829", + "step": 2844, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:02.431660", + "step": 2844, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.038797635585069656, + "timestamp": "2025-09-10 02:43:02.434272", + "step": 2845, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:02.491686", + "step": 2845, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024347158148884773, + "timestamp": "2025-09-10 02:43:02.494670", + "step": 2846, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:43:02.566152", + "step": 2846, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032168835401535034, + "timestamp": "2025-09-10 02:43:02.578822", + "step": 2847, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:02.633020", + "step": 2847, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007962683215737343, + "timestamp": "2025-09-10 02:43:02.640291", + "step": 2848, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:02.699124", + "step": 2848, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005333451088517904, + "timestamp": "2025-09-10 02:43:02.710673", + "step": 2849, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:02.764411", + "step": 2849, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006212006323039532, + "timestamp": "2025-09-10 02:43:02.767492", + "step": 2850, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:02.820589", + "step": 2850, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004719972610473633, + "timestamp": "2025-09-10 02:43:02.822852", + "step": 2851, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:02.875665", + "step": 2851, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008301042951643467, + "timestamp": "2025-09-10 02:43:02.881618", + "step": 2852, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:02.939941", + "step": 2852, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013020007871091366, + "timestamp": "2025-09-10 02:43:02.941777", + "step": 2853, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:43:02.993953", + "step": 2853, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010510066524147987, + "timestamp": "2025-09-10 02:43:02.996025", + "step": 2854, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:43:03.063923", + "step": 2854, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00926880817860365, + "timestamp": "2025-09-10 02:43:03.076390", + "step": 2855, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:43:03.129070", + "step": 2855, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003856593044474721, + "timestamp": "2025-09-10 02:43:03.135143", + "step": 2856, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:03.187409", + "step": 2856, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015365725383162498, + "timestamp": "2025-09-10 02:43:03.189891", + "step": 2857, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:03.243437", + "step": 2857, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004318633582442999, + "timestamp": "2025-09-10 02:43:03.245955", + "step": 2858, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:03.299576", + "step": 2858, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0041367243975400925, + "timestamp": "2025-09-10 02:43:03.305378", + "step": 2859, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:03.358983", + "step": 2859, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023787083104252815, + "timestamp": "2025-09-10 02:43:03.365194", + "step": 2860, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:03.418386", + "step": 2860, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018490174785256386, + "timestamp": "2025-09-10 02:43:03.424581", + "step": 2861, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:43:03.485934", + "step": 2861, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008858558721840382, + "timestamp": "2025-09-10 02:43:03.496844", + "step": 2862, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:03.550325", + "step": 2862, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026881933212280273, + "timestamp": "2025-09-10 02:43:03.553214", + "step": 2863, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:03.605954", + "step": 2863, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02366648241877556, + "timestamp": "2025-09-10 02:43:03.611823", + "step": 2864, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:03.663736", + "step": 2864, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005855969153344631, + "timestamp": "2025-09-10 02:43:03.666741", + "step": 2865, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:03.720641", + "step": 2865, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004256942309439182, + "timestamp": "2025-09-10 02:43:03.730228", + "step": 2866, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:03.784292", + "step": 2866, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015499325469136238, + "timestamp": "2025-09-10 02:43:03.794071", + "step": 2867, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:43:03.855182", + "step": 2867, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00483601214364171, + "timestamp": "2025-09-10 02:43:03.866868", + "step": 2868, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:03.919628", + "step": 2868, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022214515134692192, + "timestamp": "2025-09-10 02:43:03.926086", + "step": 2869, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:03.980371", + "step": 2869, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0195834469050169, + "timestamp": "2025-09-10 02:43:03.982799", + "step": 2870, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:43:04.050282", + "step": 2870, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019800430163741112, + "timestamp": "2025-09-10 02:43:04.062831", + "step": 2871, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:04.116345", + "step": 2871, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007477868348360062, + "timestamp": "2025-09-10 02:43:04.122178", + "step": 2872, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:04.181583", + "step": 2872, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007802496198564768, + "timestamp": "2025-09-10 02:43:04.193147", + "step": 2873, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:04.251165", + "step": 2873, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03706588223576546, + "timestamp": "2025-09-10 02:43:04.261599", + "step": 2874, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:04.314610", + "step": 2874, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0134931905195117, + "timestamp": "2025-09-10 02:43:04.316784", + "step": 2875, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:04.370048", + "step": 2875, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014370128512382507, + "timestamp": "2025-09-10 02:43:04.379148", + "step": 2876, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:04.431493", + "step": 2876, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015962282195687294, + "timestamp": "2025-09-10 02:43:04.433854", + "step": 2877, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:04.487114", + "step": 2877, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006657306104898453, + "timestamp": "2025-09-10 02:43:04.489333", + "step": 2878, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:04.542139", + "step": 2878, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003941268660128117, + "timestamp": "2025-09-10 02:43:04.544195", + "step": 2879, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:04.597904", + "step": 2879, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01750280149281025, + "timestamp": "2025-09-10 02:43:04.608294", + "step": 2880, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:04.660779", + "step": 2880, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005533973220735788, + "timestamp": "2025-09-10 02:43:04.663667", + "step": 2881, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:04.715937", + "step": 2881, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011847132816910744, + "timestamp": "2025-09-10 02:43:04.718035", + "step": 2882, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:04.770519", + "step": 2882, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017021920531988144, + "timestamp": "2025-09-10 02:43:04.773599", + "step": 2883, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:04.826530", + "step": 2883, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012416626326739788, + "timestamp": "2025-09-10 02:43:04.832148", + "step": 2884, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:04.884146", + "step": 2884, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01057855598628521, + "timestamp": "2025-09-10 02:43:04.890936", + "step": 2885, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:04.943546", + "step": 2885, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014162680599838495, + "timestamp": "2025-09-10 02:43:04.945553", + "step": 2886, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:04.997702", + "step": 2886, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018539367243647575, + "timestamp": "2025-09-10 02:43:05.000751", + "step": 2887, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:05.053755", + "step": 2887, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022198939695954323, + "timestamp": "2025-09-10 02:43:05.064139", + "step": 2888, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:05.115939", + "step": 2888, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011367526836693287, + "timestamp": "2025-09-10 02:43:05.119000", + "step": 2889, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:43:05.192528", + "step": 2889, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008567769080400467, + "timestamp": "2025-09-10 02:43:05.206219", + "step": 2890, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:05.258998", + "step": 2890, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026089565828442574, + "timestamp": "2025-09-10 02:43:05.261106", + "step": 2891, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:05.313952", + "step": 2891, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008742443285882473, + "timestamp": "2025-09-10 02:43:05.319796", + "step": 2892, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 688 + ], + "flops": 13760083599040.0 + }, + "timestamp": "2025-09-10 02:43:05.417134", + "step": 2892, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0068065267987549305, + "timestamp": "2025-09-10 02:43:05.438144", + "step": 2893, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:05.491057", + "step": 2893, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024250969290733337, + "timestamp": "2025-09-10 02:43:05.493166", + "step": 2894, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:05.545641", + "step": 2894, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006088267546147108, + "timestamp": "2025-09-10 02:43:05.548762", + "step": 2895, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:05.601537", + "step": 2895, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008796818554401398, + "timestamp": "2025-09-10 02:43:05.607140", + "step": 2896, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:05.659137", + "step": 2896, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007444328628480434, + "timestamp": "2025-09-10 02:43:05.661115", + "step": 2897, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:05.714345", + "step": 2897, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006030111573636532, + "timestamp": "2025-09-10 02:43:05.723961", + "step": 2898, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:05.776696", + "step": 2898, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022526364773511887, + "timestamp": "2025-09-10 02:43:05.779904", + "step": 2899, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:05.833221", + "step": 2899, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01635793223977089, + "timestamp": "2025-09-10 02:43:05.838829", + "step": 2900, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:43:05.903235", + "step": 2900, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01230801921337843, + "timestamp": "2025-09-10 02:43:05.916497", + "step": 2901, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:05.969267", + "step": 2901, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028225775808095932, + "timestamp": "2025-09-10 02:43:05.971563", + "step": 2902, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 560 + ], + "flops": 11200068058304.0 + }, + "timestamp": "2025-09-10 02:43:06.054100", + "step": 2902, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033608644735068083, + "timestamp": "2025-09-10 02:43:06.069544", + "step": 2903, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:06.122906", + "step": 2903, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027830079197883606, + "timestamp": "2025-09-10 02:43:06.128539", + "step": 2904, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:06.180541", + "step": 2904, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005439094267785549, + "timestamp": "2025-09-10 02:43:06.187374", + "step": 2905, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:06.240602", + "step": 2905, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01805257238447666, + "timestamp": "2025-09-10 02:43:06.242929", + "step": 2906, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:06.295456", + "step": 2906, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010302051901817322, + "timestamp": "2025-09-10 02:43:06.297715", + "step": 2907, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:06.351825", + "step": 2907, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005836172960698605, + "timestamp": "2025-09-10 02:43:06.362416", + "step": 2908, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:06.414883", + "step": 2908, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0068815648555755615, + "timestamp": "2025-09-10 02:43:06.425141", + "step": 2909, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:06.477885", + "step": 2909, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003785443725064397, + "timestamp": "2025-09-10 02:43:06.479909", + "step": 2910, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:06.533054", + "step": 2910, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021662486717104912, + "timestamp": "2025-09-10 02:43:06.542643", + "step": 2911, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:06.595728", + "step": 2911, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022201475221663713, + "timestamp": "2025-09-10 02:43:06.601527", + "step": 2912, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:06.654111", + "step": 2912, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036644372157752514, + "timestamp": "2025-09-10 02:43:06.656375", + "step": 2913, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:06.708778", + "step": 2913, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0043023936450481415, + "timestamp": "2025-09-10 02:43:06.710805", + "step": 2914, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:43:06.772647", + "step": 2914, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011631603119894862, + "timestamp": "2025-09-10 02:43:06.783744", + "step": 2915, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:06.836460", + "step": 2915, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015948059037327766, + "timestamp": "2025-09-10 02:43:06.842096", + "step": 2916, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:06.893801", + "step": 2916, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022618483752012253, + "timestamp": "2025-09-10 02:43:06.895957", + "step": 2917, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:06.948363", + "step": 2917, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023323001340031624, + "timestamp": "2025-09-10 02:43:06.950387", + "step": 2918, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:07.004055", + "step": 2918, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011524630710482597, + "timestamp": "2025-09-10 02:43:07.013672", + "step": 2919, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:43:07.074388", + "step": 2919, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015407305909320712, + "timestamp": "2025-09-10 02:43:07.086086", + "step": 2920, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:07.137786", + "step": 2920, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03542738035321236, + "timestamp": "2025-09-10 02:43:07.139892", + "step": 2921, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:07.192482", + "step": 2921, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011894494527950883, + "timestamp": "2025-09-10 02:43:07.194645", + "step": 2922, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:07.247252", + "step": 2922, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007411637343466282, + "timestamp": "2025-09-10 02:43:07.255381", + "step": 2923, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:07.308235", + "step": 2923, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002640027552843094, + "timestamp": "2025-09-10 02:43:07.313807", + "step": 2924, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:07.365289", + "step": 2924, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005221573170274496, + "timestamp": "2025-09-10 02:43:07.367536", + "step": 2925, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:07.419874", + "step": 2925, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010517584159970284, + "timestamp": "2025-09-10 02:43:07.422846", + "step": 2926, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:07.475887", + "step": 2926, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005084399599581957, + "timestamp": "2025-09-10 02:43:07.482402", + "step": 2927, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:07.534942", + "step": 2927, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02717072144150734, + "timestamp": "2025-09-10 02:43:07.540532", + "step": 2928, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:07.592832", + "step": 2928, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02197488769888878, + "timestamp": "2025-09-10 02:43:07.594982", + "step": 2929, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:43:07.667028", + "step": 2929, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008209539577364922, + "timestamp": "2025-09-10 02:43:07.680518", + "step": 2930, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:07.735665", + "step": 2930, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015388053841888905, + "timestamp": "2025-09-10 02:43:07.745495", + "step": 2931, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:07.798293", + "step": 2931, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02117818407714367, + "timestamp": "2025-09-10 02:43:07.804138", + "step": 2932, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:07.856059", + "step": 2932, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018351761624217033, + "timestamp": "2025-09-10 02:43:07.858291", + "step": 2933, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:07.911138", + "step": 2933, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005453685764223337, + "timestamp": "2025-09-10 02:43:07.919621", + "step": 2934, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:07.977902", + "step": 2934, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03870873898267746, + "timestamp": "2025-09-10 02:43:07.988274", + "step": 2935, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:08.041268", + "step": 2935, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004993564449250698, + "timestamp": "2025-09-10 02:43:08.046929", + "step": 2936, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:08.099289", + "step": 2936, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022162613458931446, + "timestamp": "2025-09-10 02:43:08.105883", + "step": 2937, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:08.158771", + "step": 2937, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011630414053797722, + "timestamp": "2025-09-10 02:43:08.162009", + "step": 2938, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:08.215042", + "step": 2938, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05587531253695488, + "timestamp": "2025-09-10 02:43:08.217140", + "step": 2939, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:08.269605", + "step": 2939, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02484690584242344, + "timestamp": "2025-09-10 02:43:08.275517", + "step": 2940, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:43:25.110434", + "step": 2940, + "epoch": 2 + }, + { + "type": "pplx", + "content": 21483309.118461803, + "timestamp": "2025-09-10 02:43:25.114056", + "step": 2940, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:25.167624", + "step": 2940, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008945746347308159, + "timestamp": "2025-09-10 02:43:25.176271", + "step": 2941, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:25.229778", + "step": 2941, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010575311025604606, + "timestamp": "2025-09-10 02:43:25.231925", + "step": 2942, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:25.285072", + "step": 2942, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017733285203576088, + "timestamp": "2025-09-10 02:43:25.292965", + "step": 2943, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:25.345799", + "step": 2943, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020000610500574112, + "timestamp": "2025-09-10 02:43:25.351792", + "step": 2944, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:25.404334", + "step": 2944, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038425668608397245, + "timestamp": "2025-09-10 02:43:25.407181", + "step": 2945, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 816 + ], + "flops": 16320099139776.0 + }, + "timestamp": "2025-09-10 02:43:25.525936", + "step": 2945, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013542547821998596, + "timestamp": "2025-09-10 02:43:25.548897", + "step": 2946, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:25.602309", + "step": 2946, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012076723389327526, + "timestamp": "2025-09-10 02:43:25.605144", + "step": 2947, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:25.657775", + "step": 2947, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017612642841413617, + "timestamp": "2025-09-10 02:43:25.665260", + "step": 2948, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:25.717887", + "step": 2948, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016427170485258102, + "timestamp": "2025-09-10 02:43:25.727924", + "step": 2949, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:25.781393", + "step": 2949, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007146573625504971, + "timestamp": "2025-09-10 02:43:25.783612", + "step": 2950, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:25.836724", + "step": 2950, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00922436360269785, + "timestamp": "2025-09-10 02:43:25.839532", + "step": 2951, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:25.892469", + "step": 2951, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013483921065926552, + "timestamp": "2025-09-10 02:43:25.898357", + "step": 2952, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:43:25.958858", + "step": 2952, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007663012947887182, + "timestamp": "2025-09-10 02:43:25.970632", + "step": 2953, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:26.023987", + "step": 2953, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013566250912845135, + "timestamp": "2025-09-10 02:43:26.026185", + "step": 2954, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:26.079225", + "step": 2954, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026658134534955025, + "timestamp": "2025-09-10 02:43:26.081360", + "step": 2955, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:26.133781", + "step": 2955, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004819025285542011, + "timestamp": "2025-09-10 02:43:26.139451", + "step": 2956, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:26.192040", + "step": 2956, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018547430634498596, + "timestamp": "2025-09-10 02:43:26.193957", + "step": 2957, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:26.247436", + "step": 2957, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003229897003620863, + "timestamp": "2025-09-10 02:43:26.249411", + "step": 2958, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:26.302574", + "step": 2958, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017530253157019615, + "timestamp": "2025-09-10 02:43:26.304676", + "step": 2959, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:26.357863", + "step": 2959, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028641356620937586, + "timestamp": "2025-09-10 02:43:26.368201", + "step": 2960, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:26.420948", + "step": 2960, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02891319803893566, + "timestamp": "2025-09-10 02:43:26.431366", + "step": 2961, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:43:26.492639", + "step": 2961, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020257892087101936, + "timestamp": "2025-09-10 02:43:26.503518", + "step": 2962, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:26.556643", + "step": 2962, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009795842925086617, + "timestamp": "2025-09-10 02:43:26.558940", + "step": 2963, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:26.611897", + "step": 2963, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008834127336740494, + "timestamp": "2025-09-10 02:43:26.620767", + "step": 2964, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:26.673497", + "step": 2964, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00043440479203127325, + "timestamp": "2025-09-10 02:43:26.675972", + "step": 2965, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:26.728566", + "step": 2965, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021795372013002634, + "timestamp": "2025-09-10 02:43:26.730892", + "step": 2966, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:26.784336", + "step": 2966, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014383898815140128, + "timestamp": "2025-09-10 02:43:26.786683", + "step": 2967, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:26.839298", + "step": 2967, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009421980008482933, + "timestamp": "2025-09-10 02:43:26.845409", + "step": 2968, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:26.897998", + "step": 2968, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011221504770219326, + "timestamp": "2025-09-10 02:43:26.904561", + "step": 2969, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:26.962882", + "step": 2969, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034413428511470556, + "timestamp": "2025-09-10 02:43:26.973354", + "step": 2970, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:27.026661", + "step": 2970, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001014295150525868, + "timestamp": "2025-09-10 02:43:27.036297", + "step": 2971, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:27.089437", + "step": 2971, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013005608692765236, + "timestamp": "2025-09-10 02:43:27.095241", + "step": 2972, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:27.153946", + "step": 2972, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037272910121828318, + "timestamp": "2025-09-10 02:43:27.165460", + "step": 2973, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:27.218882", + "step": 2973, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01542600803077221, + "timestamp": "2025-09-10 02:43:27.221114", + "step": 2974, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:27.274092", + "step": 2974, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008530435152351856, + "timestamp": "2025-09-10 02:43:27.276210", + "step": 2975, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:27.328501", + "step": 2975, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025617094710469246, + "timestamp": "2025-09-10 02:43:27.334339", + "step": 2976, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:27.386669", + "step": 2976, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015901281731203198, + "timestamp": "2025-09-10 02:43:27.393268", + "step": 2977, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:27.446658", + "step": 2977, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009493803605437279, + "timestamp": "2025-09-10 02:43:27.454912", + "step": 2978, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:27.508045", + "step": 2978, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005454585188999772, + "timestamp": "2025-09-10 02:43:27.510304", + "step": 2979, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:27.562864", + "step": 2979, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009057528339326382, + "timestamp": "2025-09-10 02:43:27.568763", + "step": 2980, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:43:27.620268", + "step": 2980, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001526238163933158, + "timestamp": "2025-09-10 02:43:27.622443", + "step": 2981, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:27.674860", + "step": 2981, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034560464322566986, + "timestamp": "2025-09-10 02:43:27.677102", + "step": 2982, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:27.729589", + "step": 2982, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003290567547082901, + "timestamp": "2025-09-10 02:43:27.731812", + "step": 2983, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:27.784023", + "step": 2983, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036186235956847668, + "timestamp": "2025-09-10 02:43:27.789747", + "step": 2984, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:27.841717", + "step": 2984, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00348859466612339, + "timestamp": "2025-09-10 02:43:27.848233", + "step": 2985, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:27.900963", + "step": 2985, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003103638533502817, + "timestamp": "2025-09-10 02:43:27.903494", + "step": 2986, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:27.957033", + "step": 2986, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010334305465221405, + "timestamp": "2025-09-10 02:43:27.966688", + "step": 2987, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:28.019562", + "step": 2987, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009181870147585869, + "timestamp": "2025-09-10 02:43:28.025110", + "step": 2988, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:28.077095", + "step": 2988, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002298856619745493, + "timestamp": "2025-09-10 02:43:28.080139", + "step": 2989, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:28.132786", + "step": 2989, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030902244616299868, + "timestamp": "2025-09-10 02:43:28.135066", + "step": 2990, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:28.187694", + "step": 2990, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016290295869112015, + "timestamp": "2025-09-10 02:43:28.189886", + "step": 2991, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:28.242487", + "step": 2991, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031161842867732048, + "timestamp": "2025-09-10 02:43:28.248035", + "step": 2992, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:28.300029", + "step": 2992, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004563372116535902, + "timestamp": "2025-09-10 02:43:28.310251", + "step": 2993, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:28.363348", + "step": 2993, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020457947626709938, + "timestamp": "2025-09-10 02:43:28.365532", + "step": 2994, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:28.418500", + "step": 2994, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016411827877163887, + "timestamp": "2025-09-10 02:43:28.420537", + "step": 2995, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:28.473257", + "step": 2995, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003593911649659276, + "timestamp": "2025-09-10 02:43:28.479031", + "step": 2996, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:28.532369", + "step": 2996, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003384404582902789, + "timestamp": "2025-09-10 02:43:28.542849", + "step": 2997, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:28.597887", + "step": 2997, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026975279673933983, + "timestamp": "2025-09-10 02:43:28.607653", + "step": 2998, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:28.661022", + "step": 2998, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019041141495108604, + "timestamp": "2025-09-10 02:43:28.663132", + "step": 2999, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:28.716219", + "step": 2999, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01516516599804163, + "timestamp": "2025-09-10 02:43:28.722091", + "step": 3000, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 3000", + "timestamp": "2025-09-10 02:43:29.135530", + "step": 3000, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:29.190119", + "step": 3000, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021581441164016724, + "timestamp": "2025-09-10 02:43:29.197170", + "step": 3001, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:29.250756", + "step": 3001, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012467102147638798, + "timestamp": "2025-09-10 02:43:29.252711", + "step": 3002, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:29.305695", + "step": 3002, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00976636353880167, + "timestamp": "2025-09-10 02:43:29.308062", + "step": 3003, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:29.360893", + "step": 3003, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009900376200675964, + "timestamp": "2025-09-10 02:43:29.366900", + "step": 3004, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:29.419313", + "step": 3004, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017990581691265106, + "timestamp": "2025-09-10 02:43:29.422209", + "step": 3005, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:29.474942", + "step": 3005, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001193814561702311, + "timestamp": "2025-09-10 02:43:29.477148", + "step": 3006, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:43:29.543270", + "step": 3006, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005569125525653362, + "timestamp": "2025-09-10 02:43:29.555493", + "step": 3007, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:29.608379", + "step": 3007, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012687196722254157, + "timestamp": "2025-09-10 02:43:29.614183", + "step": 3008, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:29.666281", + "step": 3008, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02738204412162304, + "timestamp": "2025-09-10 02:43:29.669218", + "step": 3009, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:43:29.730388", + "step": 3009, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017042160034179688, + "timestamp": "2025-09-10 02:43:29.741349", + "step": 3010, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:29.794784", + "step": 3010, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024572748690843582, + "timestamp": "2025-09-10 02:43:29.797083", + "step": 3011, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:29.849592", + "step": 3011, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01295087393373251, + "timestamp": "2025-09-10 02:43:29.856873", + "step": 3012, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:29.910152", + "step": 3012, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017828369745984674, + "timestamp": "2025-09-10 02:43:29.919335", + "step": 3013, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:29.973569", + "step": 3013, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021940140053629875, + "timestamp": "2025-09-10 02:43:29.983341", + "step": 3014, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:30.036946", + "step": 3014, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029555542394518852, + "timestamp": "2025-09-10 02:43:30.040337", + "step": 3015, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:30.093302", + "step": 3015, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0069861263036727905, + "timestamp": "2025-09-10 02:43:30.099309", + "step": 3016, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:30.151226", + "step": 3016, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03783723711967468, + "timestamp": "2025-09-10 02:43:30.154246", + "step": 3017, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:30.212382", + "step": 3017, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014050469733774662, + "timestamp": "2025-09-10 02:43:30.214522", + "step": 3018, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:30.267609", + "step": 3018, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017659427830949426, + "timestamp": "2025-09-10 02:43:30.269914", + "step": 3019, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:30.323089", + "step": 3019, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009280859492719173, + "timestamp": "2025-09-10 02:43:30.329020", + "step": 3020, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:30.393798", + "step": 3020, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00999439973384142, + "timestamp": "2025-09-10 02:43:30.403774", + "step": 3021, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:30.458967", + "step": 3021, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006523417541757226, + "timestamp": "2025-09-10 02:43:30.464946", + "step": 3022, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:30.521783", + "step": 3022, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010792514309287071, + "timestamp": "2025-09-10 02:43:30.523855", + "step": 3023, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:30.578078", + "step": 3023, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.042817894369363785, + "timestamp": "2025-09-10 02:43:30.586583", + "step": 3024, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:30.644695", + "step": 3024, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024986821226775646, + "timestamp": "2025-09-10 02:43:30.646978", + "step": 3025, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:30.699817", + "step": 3025, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008581001311540604, + "timestamp": "2025-09-10 02:43:30.702097", + "step": 3026, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:30.754664", + "step": 3026, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.033836524933576584, + "timestamp": "2025-09-10 02:43:30.758201", + "step": 3027, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:30.812040", + "step": 3027, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003424307331442833, + "timestamp": "2025-09-10 02:43:30.825697", + "step": 3028, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:30.879359", + "step": 3028, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014254000270739198, + "timestamp": "2025-09-10 02:43:30.885922", + "step": 3029, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:30.940311", + "step": 3029, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016321172937750816, + "timestamp": "2025-09-10 02:43:30.942351", + "step": 3030, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:43:31.011323", + "step": 3030, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.046935852617025375, + "timestamp": "2025-09-10 02:43:31.024048", + "step": 3031, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:31.079782", + "step": 3031, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012795096263289452, + "timestamp": "2025-09-10 02:43:31.085585", + "step": 3032, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:31.142970", + "step": 3032, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0052894712425768375, + "timestamp": "2025-09-10 02:43:31.144960", + "step": 3033, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:31.207978", + "step": 3033, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034933737479150295, + "timestamp": "2025-09-10 02:43:31.212348", + "step": 3034, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:31.268244", + "step": 3034, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024392152205109596, + "timestamp": "2025-09-10 02:43:31.276394", + "step": 3035, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:31.332729", + "step": 3035, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007851941511034966, + "timestamp": "2025-09-10 02:43:31.338655", + "step": 3036, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:31.398572", + "step": 3036, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009740768000483513, + "timestamp": "2025-09-10 02:43:31.410102", + "step": 3037, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:31.462894", + "step": 3037, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0092580895870924, + "timestamp": "2025-09-10 02:43:31.465050", + "step": 3038, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:31.519301", + "step": 3038, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00213812873698771, + "timestamp": "2025-09-10 02:43:31.522890", + "step": 3039, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:31.577935", + "step": 3039, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020063860341906548, + "timestamp": "2025-09-10 02:43:31.583987", + "step": 3040, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:31.636011", + "step": 3040, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006031975150108337, + "timestamp": "2025-09-10 02:43:31.638253", + "step": 3041, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:43:31.704842", + "step": 3041, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018207212910056114, + "timestamp": "2025-09-10 02:43:31.717075", + "step": 3042, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:31.776423", + "step": 3042, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016082150861620903, + "timestamp": "2025-09-10 02:43:31.780024", + "step": 3043, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:31.834293", + "step": 3043, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013052371330559254, + "timestamp": "2025-09-10 02:43:31.840338", + "step": 3044, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:31.900986", + "step": 3044, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016128752380609512, + "timestamp": "2025-09-10 02:43:31.903404", + "step": 3045, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:31.956565", + "step": 3045, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01358273159712553, + "timestamp": "2025-09-10 02:43:31.958910", + "step": 3046, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:32.012499", + "step": 3046, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029387916438281536, + "timestamp": "2025-09-10 02:43:32.022110", + "step": 3047, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:32.076197", + "step": 3047, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009799973107874393, + "timestamp": "2025-09-10 02:43:32.082147", + "step": 3048, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:32.134403", + "step": 3048, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005152401048690081, + "timestamp": "2025-09-10 02:43:32.136778", + "step": 3049, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:32.189458", + "step": 3049, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01178047340363264, + "timestamp": "2025-09-10 02:43:32.191898", + "step": 3050, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:32.244657", + "step": 3050, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021244531963020563, + "timestamp": "2025-09-10 02:43:32.247452", + "step": 3051, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:32.300595", + "step": 3051, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004618776496499777, + "timestamp": "2025-09-10 02:43:32.307720", + "step": 3052, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:43:32.375034", + "step": 3052, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014385617338120937, + "timestamp": "2025-09-10 02:43:32.388783", + "step": 3053, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:32.441947", + "step": 3053, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005455045960843563, + "timestamp": "2025-09-10 02:43:32.449845", + "step": 3054, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:32.503765", + "step": 3054, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028096141759306192, + "timestamp": "2025-09-10 02:43:32.505934", + "step": 3055, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:32.558732", + "step": 3055, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01378645095974207, + "timestamp": "2025-09-10 02:43:32.564709", + "step": 3056, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:32.616970", + "step": 3056, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026725469157099724, + "timestamp": "2025-09-10 02:43:32.619260", + "step": 3057, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:32.674191", + "step": 3057, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002781761111691594, + "timestamp": "2025-09-10 02:43:32.683932", + "step": 3058, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:32.737790", + "step": 3058, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00698810676112771, + "timestamp": "2025-09-10 02:43:32.739839", + "step": 3059, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:32.793527", + "step": 3059, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01064991019666195, + "timestamp": "2025-09-10 02:43:32.801733", + "step": 3060, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:32.854959", + "step": 3060, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022440778091549873, + "timestamp": "2025-09-10 02:43:32.857261", + "step": 3061, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:32.912231", + "step": 3061, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007607771549373865, + "timestamp": "2025-09-10 02:43:32.922054", + "step": 3062, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:32.975055", + "step": 3062, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037695816718041897, + "timestamp": "2025-09-10 02:43:32.977337", + "step": 3063, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:33.038566", + "step": 3063, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025553121231496334, + "timestamp": "2025-09-10 02:43:33.044242", + "step": 3064, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:33.100173", + "step": 3064, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0130363954231143, + "timestamp": "2025-09-10 02:43:33.111409", + "step": 3065, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:33.164383", + "step": 3065, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009316898882389069, + "timestamp": "2025-09-10 02:43:33.166577", + "step": 3066, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:33.219644", + "step": 3066, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015726543962955475, + "timestamp": "2025-09-10 02:43:33.221895", + "step": 3067, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:33.279597", + "step": 3067, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011039117351174355, + "timestamp": "2025-09-10 02:43:33.290770", + "step": 3068, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:43:33.343898", + "step": 3068, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018892407417297363, + "timestamp": "2025-09-10 02:43:33.346167", + "step": 3069, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:33.398989", + "step": 3069, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01331985741853714, + "timestamp": "2025-09-10 02:43:33.405118", + "step": 3070, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:33.458787", + "step": 3070, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00435601407662034, + "timestamp": "2025-09-10 02:43:33.461021", + "step": 3071, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:33.513781", + "step": 3071, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037311671767383814, + "timestamp": "2025-09-10 02:43:33.519614", + "step": 3072, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:33.571982", + "step": 3072, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012653851881623268, + "timestamp": "2025-09-10 02:43:33.578301", + "step": 3073, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:33.639183", + "step": 3073, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001527549116872251, + "timestamp": "2025-09-10 02:43:33.649892", + "step": 3074, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:33.702982", + "step": 3074, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00562877906486392, + "timestamp": "2025-09-10 02:43:33.711100", + "step": 3075, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:43:33.777943", + "step": 3075, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016573479399085045, + "timestamp": "2025-09-10 02:43:33.790945", + "step": 3076, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:33.843771", + "step": 3076, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036496755201369524, + "timestamp": "2025-09-10 02:43:33.852265", + "step": 3077, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:33.905328", + "step": 3077, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029678826685994864, + "timestamp": "2025-09-10 02:43:33.907408", + "step": 3078, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:33.960337", + "step": 3078, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011269161477684975, + "timestamp": "2025-09-10 02:43:33.968487", + "step": 3079, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:34.021507", + "step": 3079, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005516494624316692, + "timestamp": "2025-09-10 02:43:34.027567", + "step": 3080, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:34.080162", + "step": 3080, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007762841880321503, + "timestamp": "2025-09-10 02:43:34.082418", + "step": 3081, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:34.136858", + "step": 3081, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00736891059204936, + "timestamp": "2025-09-10 02:43:34.139384", + "step": 3082, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:34.193981", + "step": 3082, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037586067337542772, + "timestamp": "2025-09-10 02:43:34.196395", + "step": 3083, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:34.249866", + "step": 3083, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010520155541598797, + "timestamp": "2025-09-10 02:43:34.255946", + "step": 3084, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:34.308338", + "step": 3084, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014333308674395084, + "timestamp": "2025-09-10 02:43:34.310443", + "step": 3085, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:43:34.378589", + "step": 3085, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008122967556118965, + "timestamp": "2025-09-10 02:43:34.391169", + "step": 3086, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:34.444024", + "step": 3086, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008164488710463047, + "timestamp": "2025-09-10 02:43:34.445951", + "step": 3087, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:43:51.268366", + "step": 3087, + "epoch": 2 + }, + { + "type": "pplx", + "content": 19060472.29738327, + "timestamp": "2025-09-10 02:43:51.271269", + "step": 3087, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:51.326149", + "step": 3087, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016778473509475589, + "timestamp": "2025-09-10 02:43:51.335114", + "step": 3088, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:51.391894", + "step": 3088, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00943154189735651, + "timestamp": "2025-09-10 02:43:51.393896", + "step": 3089, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:51.447082", + "step": 3089, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010285828029736876, + "timestamp": "2025-09-10 02:43:51.454967", + "step": 3090, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:51.508167", + "step": 3090, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013816392049193382, + "timestamp": "2025-09-10 02:43:51.510348", + "step": 3091, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:51.563358", + "step": 3091, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017100904136896133, + "timestamp": "2025-09-10 02:43:51.569068", + "step": 3092, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:51.625468", + "step": 3092, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035547781735658646, + "timestamp": "2025-09-10 02:43:51.636656", + "step": 3093, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:43:51.703242", + "step": 3093, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028612406458705664, + "timestamp": "2025-09-10 02:43:51.715475", + "step": 3094, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:51.768669", + "step": 3094, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025799472350627184, + "timestamp": "2025-09-10 02:43:51.770680", + "step": 3095, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:51.823582", + "step": 3095, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003510661656036973, + "timestamp": "2025-09-10 02:43:51.829206", + "step": 3096, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:43:51.880893", + "step": 3096, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002644149586558342, + "timestamp": "2025-09-10 02:43:51.882846", + "step": 3097, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:51.935425", + "step": 3097, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014366241171956062, + "timestamp": "2025-09-10 02:43:51.937587", + "step": 3098, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:51.990155", + "step": 3098, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009584962390363216, + "timestamp": "2025-09-10 02:43:51.992352", + "step": 3099, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:52.045106", + "step": 3099, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015968400985002518, + "timestamp": "2025-09-10 02:43:52.050702", + "step": 3100, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:52.102968", + "step": 3100, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021843912079930305, + "timestamp": "2025-09-10 02:43:52.105129", + "step": 3101, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:52.157611", + "step": 3101, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001562813064083457, + "timestamp": "2025-09-10 02:43:52.159682", + "step": 3102, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:52.214144", + "step": 3102, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011524799279868603, + "timestamp": "2025-09-10 02:43:52.223953", + "step": 3103, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:52.277894", + "step": 3103, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034488060045987368, + "timestamp": "2025-09-10 02:43:52.283749", + "step": 3104, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:52.336350", + "step": 3104, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026885902509093285, + "timestamp": "2025-09-10 02:43:52.343105", + "step": 3105, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:52.396339", + "step": 3105, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009301887825131416, + "timestamp": "2025-09-10 02:43:52.398554", + "step": 3106, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:52.456646", + "step": 3106, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008182553574442863, + "timestamp": "2025-09-10 02:43:52.467083", + "step": 3107, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:52.520631", + "step": 3107, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007265047635883093, + "timestamp": "2025-09-10 02:43:52.526317", + "step": 3108, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:52.578635", + "step": 3108, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005355027038604021, + "timestamp": "2025-09-10 02:43:52.580901", + "step": 3109, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:52.637902", + "step": 3109, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009155571460723877, + "timestamp": "2025-09-10 02:43:52.646178", + "step": 3110, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:52.699468", + "step": 3110, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002473545726388693, + "timestamp": "2025-09-10 02:43:52.707732", + "step": 3111, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:52.761110", + "step": 3111, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006904462352395058, + "timestamp": "2025-09-10 02:43:52.767917", + "step": 3112, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:52.820750", + "step": 3112, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008953842334449291, + "timestamp": "2025-09-10 02:43:52.822737", + "step": 3113, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:52.875172", + "step": 3113, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006417619530111551, + "timestamp": "2025-09-10 02:43:52.877364", + "step": 3114, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:43:52.950713", + "step": 3114, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0418584868311882, + "timestamp": "2025-09-10 02:43:52.964456", + "step": 3115, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:53.018738", + "step": 3115, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030400361865758896, + "timestamp": "2025-09-10 02:43:53.029346", + "step": 3116, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:53.081389", + "step": 3116, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008601049892604351, + "timestamp": "2025-09-10 02:43:53.083440", + "step": 3117, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:53.136333", + "step": 3117, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009945330210030079, + "timestamp": "2025-09-10 02:43:53.138324", + "step": 3118, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:53.192728", + "step": 3118, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018205152824521065, + "timestamp": "2025-09-10 02:43:53.202531", + "step": 3119, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:43:53.264025", + "step": 3119, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005881735123693943, + "timestamp": "2025-09-10 02:43:53.275918", + "step": 3120, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:53.327833", + "step": 3120, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012508176267147064, + "timestamp": "2025-09-10 02:43:53.329786", + "step": 3121, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:53.381846", + "step": 3121, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01045707706362009, + "timestamp": "2025-09-10 02:43:53.383931", + "step": 3122, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:53.436349", + "step": 3122, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005674861371517181, + "timestamp": "2025-09-10 02:43:53.439166", + "step": 3123, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:43:53.505364", + "step": 3123, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021117881406098604, + "timestamp": "2025-09-10 02:43:53.518409", + "step": 3124, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:53.570274", + "step": 3124, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003709127428010106, + "timestamp": "2025-09-10 02:43:53.572301", + "step": 3125, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:53.624394", + "step": 3125, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005707950331270695, + "timestamp": "2025-09-10 02:43:53.626620", + "step": 3126, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:53.679805", + "step": 3126, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02047325111925602, + "timestamp": "2025-09-10 02:43:53.689387", + "step": 3127, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:53.742759", + "step": 3127, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0044165621511638165, + "timestamp": "2025-09-10 02:43:53.748383", + "step": 3128, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:53.800726", + "step": 3128, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007385820150375366, + "timestamp": "2025-09-10 02:43:53.802670", + "step": 3129, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:53.856039", + "step": 3129, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007362124510109425, + "timestamp": "2025-09-10 02:43:53.865656", + "step": 3130, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:53.918940", + "step": 3130, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003541136684361845, + "timestamp": "2025-09-10 02:43:53.920941", + "step": 3131, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:53.973464", + "step": 3131, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00143281405325979, + "timestamp": "2025-09-10 02:43:53.979345", + "step": 3132, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:54.031131", + "step": 3132, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0071766795590519905, + "timestamp": "2025-09-10 02:43:54.033169", + "step": 3133, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:54.085526", + "step": 3133, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004538625478744507, + "timestamp": "2025-09-10 02:43:54.087804", + "step": 3134, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:54.140738", + "step": 3134, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016915742307901382, + "timestamp": "2025-09-10 02:43:54.149002", + "step": 3135, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:54.201621", + "step": 3135, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01447251532226801, + "timestamp": "2025-09-10 02:43:54.207234", + "step": 3136, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:54.259235", + "step": 3136, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027732182294130325, + "timestamp": "2025-09-10 02:43:54.261568", + "step": 3137, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:54.314127", + "step": 3137, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018878389382734895, + "timestamp": "2025-09-10 02:43:54.317012", + "step": 3138, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:54.378074", + "step": 3138, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011497820727527142, + "timestamp": "2025-09-10 02:43:54.388849", + "step": 3139, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:54.441592", + "step": 3139, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032113136257976294, + "timestamp": "2025-09-10 02:43:54.447117", + "step": 3140, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:43:54.513012", + "step": 3140, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036818793159909546, + "timestamp": "2025-09-10 02:43:54.526626", + "step": 3141, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:54.580467", + "step": 3141, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006413311348296702, + "timestamp": "2025-09-10 02:43:54.590089", + "step": 3142, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:43:54.656624", + "step": 3142, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032100610435009, + "timestamp": "2025-09-10 02:43:54.668776", + "step": 3143, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:54.721298", + "step": 3143, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009853010065853596, + "timestamp": "2025-09-10 02:43:54.726898", + "step": 3144, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:43:54.786069", + "step": 3144, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005006527062505484, + "timestamp": "2025-09-10 02:43:54.797833", + "step": 3145, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:54.850532", + "step": 3145, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006793158245272934, + "timestamp": "2025-09-10 02:43:54.852494", + "step": 3146, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:54.904627", + "step": 3146, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006459634751081467, + "timestamp": "2025-09-10 02:43:54.906595", + "step": 3147, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:43:54.986880", + "step": 3147, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011387155391275883, + "timestamp": "2025-09-10 02:43:55.002755", + "step": 3148, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:55.055781", + "step": 3148, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008111325092613697, + "timestamp": "2025-09-10 02:43:55.057805", + "step": 3149, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:55.110230", + "step": 3149, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024190669879317284, + "timestamp": "2025-09-10 02:43:55.112563", + "step": 3150, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:55.165300", + "step": 3150, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013140763156116009, + "timestamp": "2025-09-10 02:43:55.167406", + "step": 3151, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:55.219737", + "step": 3151, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00048197299474850297, + "timestamp": "2025-09-10 02:43:55.225441", + "step": 3152, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:55.281989", + "step": 3152, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.036931704729795456, + "timestamp": "2025-09-10 02:43:55.293235", + "step": 3153, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:55.353910", + "step": 3153, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005720453802496195, + "timestamp": "2025-09-10 02:43:55.364658", + "step": 3154, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:55.417547", + "step": 3154, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008417764329351485, + "timestamp": "2025-09-10 02:43:55.419836", + "step": 3155, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:55.472590", + "step": 3155, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033593338448554277, + "timestamp": "2025-09-10 02:43:55.478370", + "step": 3156, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:55.530383", + "step": 3156, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009389621205627918, + "timestamp": "2025-09-10 02:43:55.532142", + "step": 3157, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:43:55.584168", + "step": 3157, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007109578233212233, + "timestamp": "2025-09-10 02:43:55.586178", + "step": 3158, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:55.640301", + "step": 3158, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009504547342658043, + "timestamp": "2025-09-10 02:43:55.650098", + "step": 3159, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:55.702442", + "step": 3159, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000521921378094703, + "timestamp": "2025-09-10 02:43:55.707964", + "step": 3160, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:55.766774", + "step": 3160, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010958666913211346, + "timestamp": "2025-09-10 02:43:55.778348", + "step": 3161, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:55.831771", + "step": 3161, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021725790575146675, + "timestamp": "2025-09-10 02:43:55.834031", + "step": 3162, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:55.886775", + "step": 3162, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003122693160548806, + "timestamp": "2025-09-10 02:43:55.895002", + "step": 3163, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:55.948007", + "step": 3163, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003681901143863797, + "timestamp": "2025-09-10 02:43:55.953550", + "step": 3164, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:56.005598", + "step": 3164, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014724161010235548, + "timestamp": "2025-09-10 02:43:56.007850", + "step": 3165, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:56.060591", + "step": 3165, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00038874123129062355, + "timestamp": "2025-09-10 02:43:56.062884", + "step": 3166, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:56.115488", + "step": 3166, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011899313889443874, + "timestamp": "2025-09-10 02:43:56.117843", + "step": 3167, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:56.171108", + "step": 3167, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036208501551300287, + "timestamp": "2025-09-10 02:43:56.176671", + "step": 3168, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:56.235418", + "step": 3168, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023371472489088774, + "timestamp": "2025-09-10 02:43:56.246940", + "step": 3169, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:56.299613", + "step": 3169, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010267443023622036, + "timestamp": "2025-09-10 02:43:56.301647", + "step": 3170, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:56.355144", + "step": 3170, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011791570577770472, + "timestamp": "2025-09-10 02:43:56.357386", + "step": 3171, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:56.410709", + "step": 3171, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022798667196184397, + "timestamp": "2025-09-10 02:43:56.417652", + "step": 3172, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:56.470602", + "step": 3172, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019373170798644423, + "timestamp": "2025-09-10 02:43:56.477095", + "step": 3173, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:56.537779", + "step": 3173, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004268851596862078, + "timestamp": "2025-09-10 02:43:56.548435", + "step": 3174, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:56.606077", + "step": 3174, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01142526138573885, + "timestamp": "2025-09-10 02:43:56.616494", + "step": 3175, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:56.669824", + "step": 3175, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02694218046963215, + "timestamp": "2025-09-10 02:43:56.676994", + "step": 3176, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:56.728985", + "step": 3176, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016898388043045998, + "timestamp": "2025-09-10 02:43:56.730973", + "step": 3177, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:56.784069", + "step": 3177, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030371483881026506, + "timestamp": "2025-09-10 02:43:56.786105", + "step": 3178, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:56.838255", + "step": 3178, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04292638227343559, + "timestamp": "2025-09-10 02:43:56.841522", + "step": 3179, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:43:56.893485", + "step": 3179, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027255882741883397, + "timestamp": "2025-09-10 02:43:56.899193", + "step": 3180, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:56.951834", + "step": 3180, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032448743004351854, + "timestamp": "2025-09-10 02:43:56.954023", + "step": 3181, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:57.006707", + "step": 3181, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014674147823825479, + "timestamp": "2025-09-10 02:43:57.013425", + "step": 3182, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:57.066772", + "step": 3182, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012098170816898346, + "timestamp": "2025-09-10 02:43:57.068850", + "step": 3183, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:57.121711", + "step": 3183, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.041172586381435394, + "timestamp": "2025-09-10 02:43:57.127373", + "step": 3184, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:57.179558", + "step": 3184, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026879762299358845, + "timestamp": "2025-09-10 02:43:57.181523", + "step": 3185, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:57.234828", + "step": 3185, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037374666426330805, + "timestamp": "2025-09-10 02:43:57.244480", + "step": 3186, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:57.297360", + "step": 3186, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0074369641952216625, + "timestamp": "2025-09-10 02:43:57.300450", + "step": 3187, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:57.353206", + "step": 3187, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009308290667831898, + "timestamp": "2025-09-10 02:43:57.358849", + "step": 3188, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:43:57.411780", + "step": 3188, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00768723338842392, + "timestamp": "2025-09-10 02:43:57.422293", + "step": 3189, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:57.474997", + "step": 3189, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004245481453835964, + "timestamp": "2025-09-10 02:43:57.481406", + "step": 3190, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:57.534304", + "step": 3190, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03975455090403557, + "timestamp": "2025-09-10 02:43:57.536584", + "step": 3191, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:57.589297", + "step": 3191, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015883363783359528, + "timestamp": "2025-09-10 02:43:57.594837", + "step": 3192, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:43:57.647884", + "step": 3192, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007465971168130636, + "timestamp": "2025-09-10 02:43:57.649886", + "step": 3193, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:57.703022", + "step": 3193, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005559634882956743, + "timestamp": "2025-09-10 02:43:57.712676", + "step": 3194, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:43:57.780891", + "step": 3194, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010664467699825764, + "timestamp": "2025-09-10 02:43:57.793422", + "step": 3195, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:57.846546", + "step": 3195, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013000806793570518, + "timestamp": "2025-09-10 02:43:57.852306", + "step": 3196, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:57.903943", + "step": 3196, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021773725748062134, + "timestamp": "2025-09-10 02:43:57.906054", + "step": 3197, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:43:57.979490", + "step": 3197, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020083343610167503, + "timestamp": "2025-09-10 02:43:57.993237", + "step": 3198, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:58.046155", + "step": 3198, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013784419279545546, + "timestamp": "2025-09-10 02:43:58.048137", + "step": 3199, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:58.100654", + "step": 3199, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006651018746197224, + "timestamp": "2025-09-10 02:43:58.106304", + "step": 3200, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:58.158887", + "step": 3200, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001875710440799594, + "timestamp": "2025-09-10 02:43:58.161523", + "step": 3201, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:43:58.229967", + "step": 3201, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010077283717691898, + "timestamp": "2025-09-10 02:43:58.242641", + "step": 3202, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:43:58.303178", + "step": 3202, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010139418300241232, + "timestamp": "2025-09-10 02:43:58.313971", + "step": 3203, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:43:58.375455", + "step": 3203, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00368149159476161, + "timestamp": "2025-09-10 02:43:58.387368", + "step": 3204, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:43:58.439768", + "step": 3204, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009633967652916908, + "timestamp": "2025-09-10 02:43:58.441682", + "step": 3205, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:58.494310", + "step": 3205, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01818724349141121, + "timestamp": "2025-09-10 02:43:58.496444", + "step": 3206, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:58.549371", + "step": 3206, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032443441450595856, + "timestamp": "2025-09-10 02:43:58.557237", + "step": 3207, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:58.609966", + "step": 3207, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024807697627693415, + "timestamp": "2025-09-10 02:43:58.615695", + "step": 3208, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:58.668226", + "step": 3208, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0056392294354736805, + "timestamp": "2025-09-10 02:43:58.674329", + "step": 3209, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:43:58.747634", + "step": 3209, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009499141946434975, + "timestamp": "2025-09-10 02:43:58.761304", + "step": 3210, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:43:58.829215", + "step": 3210, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003111324505880475, + "timestamp": "2025-09-10 02:43:58.841851", + "step": 3211, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:58.895081", + "step": 3211, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002740873722359538, + "timestamp": "2025-09-10 02:43:58.900651", + "step": 3212, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:43:58.952916", + "step": 3212, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032866480760276318, + "timestamp": "2025-09-10 02:43:58.959173", + "step": 3213, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:43:59.012646", + "step": 3213, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014923141337931156, + "timestamp": "2025-09-10 02:43:59.022257", + "step": 3214, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:59.075168", + "step": 3214, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027747994754463434, + "timestamp": "2025-09-10 02:43:59.077241", + "step": 3215, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:59.130704", + "step": 3215, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035309165250509977, + "timestamp": "2025-09-10 02:43:59.136596", + "step": 3216, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:43:59.196357", + "step": 3216, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007304273080080748, + "timestamp": "2025-09-10 02:43:59.208427", + "step": 3217, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:43:59.266523", + "step": 3217, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035353435669094324, + "timestamp": "2025-09-10 02:43:59.277015", + "step": 3218, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:59.329835", + "step": 3218, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021327794529497623, + "timestamp": "2025-09-10 02:43:59.331974", + "step": 3219, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:59.384552", + "step": 3219, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038116122595965862, + "timestamp": "2025-09-10 02:43:59.390456", + "step": 3220, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:59.442354", + "step": 3220, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004772346466779709, + "timestamp": "2025-09-10 02:43:59.444482", + "step": 3221, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:43:59.497266", + "step": 3221, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002986540552228689, + "timestamp": "2025-09-10 02:43:59.505590", + "step": 3222, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:59.559688", + "step": 3222, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02876511588692665, + "timestamp": "2025-09-10 02:43:59.561770", + "step": 3223, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:59.614381", + "step": 3223, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02211517095565796, + "timestamp": "2025-09-10 02:43:59.620137", + "step": 3224, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:59.672538", + "step": 3224, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003185128327459097, + "timestamp": "2025-09-10 02:43:59.674703", + "step": 3225, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:43:59.726981", + "step": 3225, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002619502367451787, + "timestamp": "2025-09-10 02:43:59.729207", + "step": 3226, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:43:59.781562", + "step": 3226, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007608884014189243, + "timestamp": "2025-09-10 02:43:59.783786", + "step": 3227, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:43:59.836506", + "step": 3227, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01058508176356554, + "timestamp": "2025-09-10 02:43:59.842050", + "step": 3228, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:43:59.893836", + "step": 3228, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037439449224621058, + "timestamp": "2025-09-10 02:43:59.895841", + "step": 3229, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:43:59.948269", + "step": 3229, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023789783008396626, + "timestamp": "2025-09-10 02:43:59.950331", + "step": 3230, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:00.007327", + "step": 3230, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0311024971306324, + "timestamp": "2025-09-10 02:44:00.017777", + "step": 3231, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:00.071248", + "step": 3231, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029404382221400738, + "timestamp": "2025-09-10 02:44:00.076898", + "step": 3232, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:00.128968", + "step": 3232, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003119255183264613, + "timestamp": "2025-09-10 02:44:00.130922", + "step": 3233, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:00.183286", + "step": 3233, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022972356528043747, + "timestamp": "2025-09-10 02:44:00.185476", + "step": 3234, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:44:16.970765", + "step": 3234, + "epoch": 2 + }, + { + "type": "pplx", + "content": 21072886.619915567, + "timestamp": "2025-09-10 02:44:16.973571", + "step": 3234, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:17.028228", + "step": 3234, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005306145641952753, + "timestamp": "2025-09-10 02:44:17.030566", + "step": 3235, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:17.084533", + "step": 3235, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007551746908575296, + "timestamp": "2025-09-10 02:44:17.091006", + "step": 3236, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:17.148718", + "step": 3236, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039247856475412846, + "timestamp": "2025-09-10 02:44:17.153810", + "step": 3237, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:44:17.213675", + "step": 3237, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018452981486916542, + "timestamp": "2025-09-10 02:44:17.223474", + "step": 3238, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:17.276990", + "step": 3238, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010647733695805073, + "timestamp": "2025-09-10 02:44:17.285212", + "step": 3239, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:17.343826", + "step": 3239, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016128303250297904, + "timestamp": "2025-09-10 02:44:17.350110", + "step": 3240, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:17.404554", + "step": 3240, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033778951037675142, + "timestamp": "2025-09-10 02:44:17.406570", + "step": 3241, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:17.460658", + "step": 3241, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037463899701833725, + "timestamp": "2025-09-10 02:44:17.466552", + "step": 3242, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:17.520706", + "step": 3242, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008468790911138058, + "timestamp": "2025-09-10 02:44:17.523119", + "step": 3243, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 784 + ], + "flops": 15680095254592.0 + }, + "timestamp": "2025-09-10 02:44:17.636994", + "step": 3243, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017466222867369652, + "timestamp": "2025-09-10 02:44:17.659922", + "step": 3244, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:44:17.714606", + "step": 3244, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002223787596449256, + "timestamp": "2025-09-10 02:44:17.724910", + "step": 3245, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:17.778190", + "step": 3245, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012982979416847229, + "timestamp": "2025-09-10 02:44:17.780392", + "step": 3246, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:17.834243", + "step": 3246, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006452381145209074, + "timestamp": "2025-09-10 02:44:17.841932", + "step": 3247, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:17.896423", + "step": 3247, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009165070950984955, + "timestamp": "2025-09-10 02:44:17.902794", + "step": 3248, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:44:17.970803", + "step": 3248, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018486840417608619, + "timestamp": "2025-09-10 02:44:17.984558", + "step": 3249, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:18.038415", + "step": 3249, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02603820338845253, + "timestamp": "2025-09-10 02:44:18.041590", + "step": 3250, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:18.094948", + "step": 3250, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002727191662415862, + "timestamp": "2025-09-10 02:44:18.097222", + "step": 3251, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:18.150020", + "step": 3251, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02278628572821617, + "timestamp": "2025-09-10 02:44:18.156088", + "step": 3252, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:18.209228", + "step": 3252, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002900347812101245, + "timestamp": "2025-09-10 02:44:18.211614", + "step": 3253, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:18.264896", + "step": 3253, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00995334517210722, + "timestamp": "2025-09-10 02:44:18.268172", + "step": 3254, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:18.321906", + "step": 3254, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008439576486125588, + "timestamp": "2025-09-10 02:44:18.324305", + "step": 3255, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:18.377885", + "step": 3255, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006541337352246046, + "timestamp": "2025-09-10 02:44:18.384137", + "step": 3256, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:18.437274", + "step": 3256, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02270149253308773, + "timestamp": "2025-09-10 02:44:18.439652", + "step": 3257, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:18.493137", + "step": 3257, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006551208789460361, + "timestamp": "2025-09-10 02:44:18.495442", + "step": 3258, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:18.548847", + "step": 3258, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011171090882271528, + "timestamp": "2025-09-10 02:44:18.551627", + "step": 3259, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:18.605121", + "step": 3259, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019644513726234436, + "timestamp": "2025-09-10 02:44:18.611154", + "step": 3260, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:44:18.676209", + "step": 3260, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007226437446661294, + "timestamp": "2025-09-10 02:44:18.689397", + "step": 3261, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:18.744002", + "step": 3261, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001632618485018611, + "timestamp": "2025-09-10 02:44:18.750154", + "step": 3262, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:18.803676", + "step": 3262, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021618137136101723, + "timestamp": "2025-09-10 02:44:18.805851", + "step": 3263, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:44:18.880205", + "step": 3263, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005943900439888239, + "timestamp": "2025-09-10 02:44:18.894895", + "step": 3264, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:44:18.955298", + "step": 3264, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017461031675338745, + "timestamp": "2025-09-10 02:44:18.966865", + "step": 3265, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:19.022686", + "step": 3265, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013536013662815094, + "timestamp": "2025-09-10 02:44:19.025313", + "step": 3266, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:19.079912", + "step": 3266, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010626944713294506, + "timestamp": "2025-09-10 02:44:19.082143", + "step": 3267, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:19.135395", + "step": 3267, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007986924611032009, + "timestamp": "2025-09-10 02:44:19.141258", + "step": 3268, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:19.194374", + "step": 3268, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010827191174030304, + "timestamp": "2025-09-10 02:44:19.196666", + "step": 3269, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:19.249972", + "step": 3269, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007971197366714478, + "timestamp": "2025-09-10 02:44:19.252366", + "step": 3270, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:19.306000", + "step": 3270, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00854082778096199, + "timestamp": "2025-09-10 02:44:19.308038", + "step": 3271, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:19.360580", + "step": 3271, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005443661939352751, + "timestamp": "2025-09-10 02:44:19.366382", + "step": 3272, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:19.419358", + "step": 3272, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0042872135527431965, + "timestamp": "2025-09-10 02:44:19.421324", + "step": 3273, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:19.474137", + "step": 3273, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010055058635771275, + "timestamp": "2025-09-10 02:44:19.476328", + "step": 3274, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:19.528568", + "step": 3274, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0064366958104074, + "timestamp": "2025-09-10 02:44:19.531591", + "step": 3275, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:19.584395", + "step": 3275, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013839764287695289, + "timestamp": "2025-09-10 02:44:19.590186", + "step": 3276, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:19.643021", + "step": 3276, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011878689983859658, + "timestamp": "2025-09-10 02:44:19.645032", + "step": 3277, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:19.697720", + "step": 3277, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009049239451996982, + "timestamp": "2025-09-10 02:44:19.699856", + "step": 3278, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:44:19.754805", + "step": 3278, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002064666012302041, + "timestamp": "2025-09-10 02:44:19.764598", + "step": 3279, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:19.818941", + "step": 3279, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002905084053054452, + "timestamp": "2025-09-10 02:44:19.827307", + "step": 3280, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:19.879565", + "step": 3280, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002872213488444686, + "timestamp": "2025-09-10 02:44:19.881945", + "step": 3281, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:19.934949", + "step": 3281, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005986655596643686, + "timestamp": "2025-09-10 02:44:19.941484", + "step": 3282, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:44:20.003286", + "step": 3282, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00405543390661478, + "timestamp": "2025-09-10 02:44:20.014239", + "step": 3283, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:20.067188", + "step": 3283, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007533874828368425, + "timestamp": "2025-09-10 02:44:20.072873", + "step": 3284, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:20.126007", + "step": 3284, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004359562881290913, + "timestamp": "2025-09-10 02:44:20.128671", + "step": 3285, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:20.181505", + "step": 3285, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004524012096226215, + "timestamp": "2025-09-10 02:44:20.188275", + "step": 3286, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:20.241398", + "step": 3286, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02158978395164013, + "timestamp": "2025-09-10 02:44:20.243702", + "step": 3287, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:20.305155", + "step": 3287, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010425695218145847, + "timestamp": "2025-09-10 02:44:20.310763", + "step": 3288, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:20.363305", + "step": 3288, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006829250603914261, + "timestamp": "2025-09-10 02:44:20.371489", + "step": 3289, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:20.429336", + "step": 3289, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00593261793255806, + "timestamp": "2025-09-10 02:44:20.431473", + "step": 3290, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:20.484946", + "step": 3290, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007732923608273268, + "timestamp": "2025-09-10 02:44:20.494520", + "step": 3291, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:20.547394", + "step": 3291, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028162673115730286, + "timestamp": "2025-09-10 02:44:20.553283", + "step": 3292, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:20.606411", + "step": 3292, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004819825291633606, + "timestamp": "2025-09-10 02:44:20.620002", + "step": 3293, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:20.676430", + "step": 3293, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016504162922501564, + "timestamp": "2025-09-10 02:44:20.684386", + "step": 3294, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:20.740783", + "step": 3294, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006112895789556205, + "timestamp": "2025-09-10 02:44:20.743030", + "step": 3295, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:20.796494", + "step": 3295, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019578339997678995, + "timestamp": "2025-09-10 02:44:20.807642", + "step": 3296, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:20.877319", + "step": 3296, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018936438485980034, + "timestamp": "2025-09-10 02:44:20.880668", + "step": 3297, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:44:20.950824", + "step": 3297, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016984677640721202, + "timestamp": "2025-09-10 02:44:20.963399", + "step": 3298, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:21.017966", + "step": 3298, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003191543510183692, + "timestamp": "2025-09-10 02:44:21.020366", + "step": 3299, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:21.073349", + "step": 3299, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005853749345988035, + "timestamp": "2025-09-10 02:44:21.080173", + "step": 3300, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:21.132549", + "step": 3300, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004540544003248215, + "timestamp": "2025-09-10 02:44:21.134846", + "step": 3301, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:21.188075", + "step": 3301, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002611069008708, + "timestamp": "2025-09-10 02:44:21.190311", + "step": 3302, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:21.243776", + "step": 3302, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01286126859486103, + "timestamp": "2025-09-10 02:44:21.246079", + "step": 3303, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:21.299389", + "step": 3303, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012683229520916939, + "timestamp": "2025-09-10 02:44:21.305576", + "step": 3304, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:21.359326", + "step": 3304, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009508299408480525, + "timestamp": "2025-09-10 02:44:21.361420", + "step": 3305, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:21.414975", + "step": 3305, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01446224283427, + "timestamp": "2025-09-10 02:44:21.417543", + "step": 3306, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:21.470712", + "step": 3306, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017067965120077133, + "timestamp": "2025-09-10 02:44:21.472914", + "step": 3307, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:21.526425", + "step": 3307, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023929893970489502, + "timestamp": "2025-09-10 02:44:21.532768", + "step": 3308, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:21.585447", + "step": 3308, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002198033267632127, + "timestamp": "2025-09-10 02:44:21.593525", + "step": 3309, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:21.647026", + "step": 3309, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02316807396709919, + "timestamp": "2025-09-10 02:44:21.649039", + "step": 3310, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:21.702430", + "step": 3310, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022349718492478132, + "timestamp": "2025-09-10 02:44:21.708849", + "step": 3311, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:21.766916", + "step": 3311, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02320941723883152, + "timestamp": "2025-09-10 02:44:21.778144", + "step": 3312, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:21.831235", + "step": 3312, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0041513568721711636, + "timestamp": "2025-09-10 02:44:21.837629", + "step": 3313, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:21.890631", + "step": 3313, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030766088515520096, + "timestamp": "2025-09-10 02:44:21.893775", + "step": 3314, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:21.946848", + "step": 3314, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012035499326884747, + "timestamp": "2025-09-10 02:44:21.949148", + "step": 3315, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:22.002046", + "step": 3315, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02300579473376274, + "timestamp": "2025-09-10 02:44:22.008790", + "step": 3316, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:22.061436", + "step": 3316, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000930184789467603, + "timestamp": "2025-09-10 02:44:22.064700", + "step": 3317, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:22.118316", + "step": 3317, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021347789093852043, + "timestamp": "2025-09-10 02:44:22.121455", + "step": 3318, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:44:22.182919", + "step": 3318, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012315197382122278, + "timestamp": "2025-09-10 02:44:22.194018", + "step": 3319, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:22.248014", + "step": 3319, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06900060921907425, + "timestamp": "2025-09-10 02:44:22.258452", + "step": 3320, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:44:22.318054", + "step": 3320, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010998686775565147, + "timestamp": "2025-09-10 02:44:22.329883", + "step": 3321, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:22.383200", + "step": 3321, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017296746373176575, + "timestamp": "2025-09-10 02:44:22.385811", + "step": 3322, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:22.439196", + "step": 3322, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01512966025620699, + "timestamp": "2025-09-10 02:44:22.442275", + "step": 3323, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:22.495366", + "step": 3323, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018649300909601152, + "timestamp": "2025-09-10 02:44:22.501516", + "step": 3324, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:44:22.555205", + "step": 3324, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036657515447586775, + "timestamp": "2025-09-10 02:44:22.565663", + "step": 3325, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 576 + ], + "flops": 11520070000896.0 + }, + "timestamp": "2025-09-10 02:44:22.649918", + "step": 3325, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00610398082062602, + "timestamp": "2025-09-10 02:44:22.665436", + "step": 3326, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:22.720106", + "step": 3326, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034286784939467907, + "timestamp": "2025-09-10 02:44:22.725333", + "step": 3327, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:22.779051", + "step": 3327, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004797658883035183, + "timestamp": "2025-09-10 02:44:22.785144", + "step": 3328, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:22.837957", + "step": 3328, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006010635406710207, + "timestamp": "2025-09-10 02:44:22.840739", + "step": 3329, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:44:22.901319", + "step": 3329, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001819917350076139, + "timestamp": "2025-09-10 02:44:22.912009", + "step": 3330, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:44:22.965458", + "step": 3330, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010529962601140141, + "timestamp": "2025-09-10 02:44:22.967774", + "step": 3331, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:23.021557", + "step": 3331, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020855003967881203, + "timestamp": "2025-09-10 02:44:23.027721", + "step": 3332, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:23.080274", + "step": 3332, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004689569119364023, + "timestamp": "2025-09-10 02:44:23.082239", + "step": 3333, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:23.135311", + "step": 3333, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03814322128891945, + "timestamp": "2025-09-10 02:44:23.138604", + "step": 3334, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:44:23.194658", + "step": 3334, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008889822289347649, + "timestamp": "2025-09-10 02:44:23.196785", + "step": 3335, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:23.250020", + "step": 3335, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006007408257573843, + "timestamp": "2025-09-10 02:44:23.255985", + "step": 3336, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:23.308876", + "step": 3336, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004595792852342129, + "timestamp": "2025-09-10 02:44:23.315096", + "step": 3337, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:23.368313", + "step": 3337, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029184441082179546, + "timestamp": "2025-09-10 02:44:23.370430", + "step": 3338, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:23.424383", + "step": 3338, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0044219414703547955, + "timestamp": "2025-09-10 02:44:23.432077", + "step": 3339, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:23.485388", + "step": 3339, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006493097636848688, + "timestamp": "2025-09-10 02:44:23.491137", + "step": 3340, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:23.543914", + "step": 3340, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017229022923856974, + "timestamp": "2025-09-10 02:44:23.546000", + "step": 3341, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:44:23.598927", + "step": 3341, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002881512511521578, + "timestamp": "2025-09-10 02:44:23.601145", + "step": 3342, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:23.655230", + "step": 3342, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02432120032608509, + "timestamp": "2025-09-10 02:44:23.657380", + "step": 3343, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:23.711987", + "step": 3343, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0251313503831625, + "timestamp": "2025-09-10 02:44:23.718027", + "step": 3344, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:23.774774", + "step": 3344, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031528447289019823, + "timestamp": "2025-09-10 02:44:23.785979", + "step": 3345, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:23.839499", + "step": 3345, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008504916331730783, + "timestamp": "2025-09-10 02:44:23.841778", + "step": 3346, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:23.895535", + "step": 3346, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008890111930668354, + "timestamp": "2025-09-10 02:44:23.905156", + "step": 3347, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:23.958635", + "step": 3347, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02196441777050495, + "timestamp": "2025-09-10 02:44:23.964431", + "step": 3348, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:24.017304", + "step": 3348, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007154666353017092, + "timestamp": "2025-09-10 02:44:24.019258", + "step": 3349, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:24.072405", + "step": 3349, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002116176765412092, + "timestamp": "2025-09-10 02:44:24.074613", + "step": 3350, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:24.128259", + "step": 3350, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07852780818939209, + "timestamp": "2025-09-10 02:44:24.130642", + "step": 3351, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:24.183591", + "step": 3351, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003637216577772051, + "timestamp": "2025-09-10 02:44:24.189784", + "step": 3352, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:24.242877", + "step": 3352, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008689274080097675, + "timestamp": "2025-09-10 02:44:24.245431", + "step": 3353, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:24.299599", + "step": 3353, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033759246580302715, + "timestamp": "2025-09-10 02:44:24.309176", + "step": 3354, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:24.362620", + "step": 3354, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015087587526068091, + "timestamp": "2025-09-10 02:44:24.364885", + "step": 3355, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:24.418282", + "step": 3355, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001986152259632945, + "timestamp": "2025-09-10 02:44:24.424255", + "step": 3356, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:44:24.477341", + "step": 3356, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012343788519501686, + "timestamp": "2025-09-10 02:44:24.487888", + "step": 3357, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:24.541321", + "step": 3357, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00478591863065958, + "timestamp": "2025-09-10 02:44:24.543595", + "step": 3358, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:24.597361", + "step": 3358, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016360685229301453, + "timestamp": "2025-09-10 02:44:24.606964", + "step": 3359, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:24.660154", + "step": 3359, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.043959442526102066, + "timestamp": "2025-09-10 02:44:24.665667", + "step": 3360, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:24.718894", + "step": 3360, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015006103552877903, + "timestamp": "2025-09-10 02:44:24.725066", + "step": 3361, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:24.778202", + "step": 3361, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010425652144476771, + "timestamp": "2025-09-10 02:44:24.784520", + "step": 3362, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:24.838136", + "step": 3362, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009364688768982887, + "timestamp": "2025-09-10 02:44:24.839923", + "step": 3363, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:24.893119", + "step": 3363, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003628287522587925, + "timestamp": "2025-09-10 02:44:24.898802", + "step": 3364, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:24.951293", + "step": 3364, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002183773322030902, + "timestamp": "2025-09-10 02:44:24.953614", + "step": 3365, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:25.006894", + "step": 3365, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010487676598131657, + "timestamp": "2025-09-10 02:44:25.009234", + "step": 3366, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:25.062595", + "step": 3366, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013433645479381084, + "timestamp": "2025-09-10 02:44:25.064936", + "step": 3367, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:25.118624", + "step": 3367, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008830679580569267, + "timestamp": "2025-09-10 02:44:25.125553", + "step": 3368, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:25.178205", + "step": 3368, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007802027743309736, + "timestamp": "2025-09-10 02:44:25.180225", + "step": 3369, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:25.233707", + "step": 3369, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0065420083701610565, + "timestamp": "2025-09-10 02:44:25.235527", + "step": 3370, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:25.288164", + "step": 3370, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002406763145700097, + "timestamp": "2025-09-10 02:44:25.289964", + "step": 3371, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:25.342920", + "step": 3371, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017273176927119493, + "timestamp": "2025-09-10 02:44:25.351828", + "step": 3372, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:25.404527", + "step": 3372, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030210746452212334, + "timestamp": "2025-09-10 02:44:25.407431", + "step": 3373, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:25.460205", + "step": 3373, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01453532837331295, + "timestamp": "2025-09-10 02:44:25.462459", + "step": 3374, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:25.516047", + "step": 3374, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014118814142420888, + "timestamp": "2025-09-10 02:44:25.523839", + "step": 3375, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:44:25.592105", + "step": 3375, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01583327353000641, + "timestamp": "2025-09-10 02:44:25.605490", + "step": 3376, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:25.658433", + "step": 3376, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002258200664073229, + "timestamp": "2025-09-10 02:44:25.660470", + "step": 3377, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:44:25.713128", + "step": 3377, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015727082267403603, + "timestamp": "2025-09-10 02:44:25.714902", + "step": 3378, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:25.767641", + "step": 3378, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023107485845685005, + "timestamp": "2025-09-10 02:44:25.769829", + "step": 3379, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:44:25.824492", + "step": 3379, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011981871910393238, + "timestamp": "2025-09-10 02:44:25.835050", + "step": 3380, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:44:25.895341", + "step": 3380, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010925479233264923, + "timestamp": "2025-09-10 02:44:25.907390", + "step": 3381, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:44:42.755425", + "step": 3381, + "epoch": 2 + }, + { + "type": "pplx", + "content": 22882422.552492164, + "timestamp": "2025-09-10 02:44:42.758122", + "step": 3381, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:44:42.824424", + "step": 3381, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003781441133469343, + "timestamp": "2025-09-10 02:44:42.837000", + "step": 3382, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:42.895383", + "step": 3382, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014598295092582703, + "timestamp": "2025-09-10 02:44:42.905814", + "step": 3383, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:42.960357", + "step": 3383, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004385597538203001, + "timestamp": "2025-09-10 02:44:42.970554", + "step": 3384, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:44:43.030679", + "step": 3384, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004374785348773003, + "timestamp": "2025-09-10 02:44:43.042659", + "step": 3385, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:43.096292", + "step": 3385, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020921386312693357, + "timestamp": "2025-09-10 02:44:43.104092", + "step": 3386, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:43.157423", + "step": 3386, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02318628691136837, + "timestamp": "2025-09-10 02:44:43.163432", + "step": 3387, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:43.220104", + "step": 3387, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004269985016435385, + "timestamp": "2025-09-10 02:44:43.226129", + "step": 3388, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:43.282653", + "step": 3388, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007775088306516409, + "timestamp": "2025-09-10 02:44:43.286014", + "step": 3389, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:43.341450", + "step": 3389, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005463188048452139, + "timestamp": "2025-09-10 02:44:43.343579", + "step": 3390, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:43.410243", + "step": 3390, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012385325506329536, + "timestamp": "2025-09-10 02:44:43.418600", + "step": 3391, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:43.475393", + "step": 3391, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016082149231806397, + "timestamp": "2025-09-10 02:44:43.482021", + "step": 3392, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:43.536110", + "step": 3392, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016975795850157738, + "timestamp": "2025-09-10 02:44:43.538330", + "step": 3393, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:44:43.594918", + "step": 3393, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019571630284190178, + "timestamp": "2025-09-10 02:44:43.604689", + "step": 3394, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:43.660007", + "step": 3394, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001042350078932941, + "timestamp": "2025-09-10 02:44:43.662076", + "step": 3395, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:43.715154", + "step": 3395, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00979469995945692, + "timestamp": "2025-09-10 02:44:43.726065", + "step": 3396, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:43.782445", + "step": 3396, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018953384831547737, + "timestamp": "2025-09-10 02:44:43.792214", + "step": 3397, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:44:43.860776", + "step": 3397, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004509978927671909, + "timestamp": "2025-09-10 02:44:43.871836", + "step": 3398, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:43.926224", + "step": 3398, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006286496762186289, + "timestamp": "2025-09-10 02:44:43.933066", + "step": 3399, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:43.995215", + "step": 3399, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012036184780299664, + "timestamp": "2025-09-10 02:44:44.001091", + "step": 3400, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:44.054376", + "step": 3400, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002427857369184494, + "timestamp": "2025-09-10 02:44:44.056852", + "step": 3401, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:44.112975", + "step": 3401, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00120683538261801, + "timestamp": "2025-09-10 02:44:44.122368", + "step": 3402, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:44:44.191450", + "step": 3402, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018053905805572867, + "timestamp": "2025-09-10 02:44:44.204187", + "step": 3403, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:44.258302", + "step": 3403, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008580326684750617, + "timestamp": "2025-09-10 02:44:44.264074", + "step": 3404, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:44.319006", + "step": 3404, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.045827168971300125, + "timestamp": "2025-09-10 02:44:44.321021", + "step": 3405, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:44.374769", + "step": 3405, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007903196848928928, + "timestamp": "2025-09-10 02:44:44.384334", + "step": 3406, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:44.438333", + "step": 3406, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015009711496531963, + "timestamp": "2025-09-10 02:44:44.469175", + "step": 3407, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:44.534255", + "step": 3407, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03113992139697075, + "timestamp": "2025-09-10 02:44:44.542313", + "step": 3408, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:44.603550", + "step": 3408, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007063610944896936, + "timestamp": "2025-09-10 02:44:44.614726", + "step": 3409, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:44.669277", + "step": 3409, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01674896851181984, + "timestamp": "2025-09-10 02:44:44.671400", + "step": 3410, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:44.729239", + "step": 3410, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00911670457571745, + "timestamp": "2025-09-10 02:44:44.739679", + "step": 3411, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:44.794340", + "step": 3411, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006235290318727493, + "timestamp": "2025-09-10 02:44:44.801648", + "step": 3412, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:44.857328", + "step": 3412, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006860231515020132, + "timestamp": "2025-09-10 02:44:44.860076", + "step": 3413, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:44.913896", + "step": 3413, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009075680747628212, + "timestamp": "2025-09-10 02:44:44.919485", + "step": 3414, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:44.979684", + "step": 3414, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019099082564935088, + "timestamp": "2025-09-10 02:44:44.987206", + "step": 3415, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:45.049015", + "step": 3415, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010206512175500393, + "timestamp": "2025-09-10 02:44:45.054988", + "step": 3416, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:45.107122", + "step": 3416, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00477524334564805, + "timestamp": "2025-09-10 02:44:45.109927", + "step": 3417, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:45.162846", + "step": 3417, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005614953581243753, + "timestamp": "2025-09-10 02:44:45.165127", + "step": 3418, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:44:45.225446", + "step": 3418, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003601041389629245, + "timestamp": "2025-09-10 02:44:45.236164", + "step": 3419, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:45.289662", + "step": 3419, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013540426269173622, + "timestamp": "2025-09-10 02:44:45.298536", + "step": 3420, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:44:45.370197", + "step": 3420, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005283182021230459, + "timestamp": "2025-09-10 02:44:45.385124", + "step": 3421, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:45.448208", + "step": 3421, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03090631775557995, + "timestamp": "2025-09-10 02:44:45.454068", + "step": 3422, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:45.512519", + "step": 3422, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003905541030690074, + "timestamp": "2025-09-10 02:44:45.523344", + "step": 3423, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:45.584422", + "step": 3423, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010258356109261513, + "timestamp": "2025-09-10 02:44:45.590304", + "step": 3424, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:45.646403", + "step": 3424, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035311724059283733, + "timestamp": "2025-09-10 02:44:45.652522", + "step": 3425, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:45.707375", + "step": 3425, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008509389124810696, + "timestamp": "2025-09-10 02:44:45.709518", + "step": 3426, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:45.763294", + "step": 3426, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000998670351691544, + "timestamp": "2025-09-10 02:44:45.765471", + "step": 3427, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:45.818667", + "step": 3427, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04099467024207115, + "timestamp": "2025-09-10 02:44:45.826928", + "step": 3428, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:45.879584", + "step": 3428, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01535296905785799, + "timestamp": "2025-09-10 02:44:45.882211", + "step": 3429, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:45.935602", + "step": 3429, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006567057222127914, + "timestamp": "2025-09-10 02:44:45.937730", + "step": 3430, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:45.995766", + "step": 3430, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005960697773844004, + "timestamp": "2025-09-10 02:44:46.001112", + "step": 3431, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:46.070051", + "step": 3431, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020980017259716988, + "timestamp": "2025-09-10 02:44:46.080516", + "step": 3432, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:46.133215", + "step": 3432, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021771802566945553, + "timestamp": "2025-09-10 02:44:46.137087", + "step": 3433, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:46.197699", + "step": 3433, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008880245732143521, + "timestamp": "2025-09-10 02:44:46.199779", + "step": 3434, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:46.252974", + "step": 3434, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004372372291982174, + "timestamp": "2025-09-10 02:44:46.255288", + "step": 3435, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:46.311548", + "step": 3435, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038417575415223837, + "timestamp": "2025-09-10 02:44:46.317659", + "step": 3436, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:46.371869", + "step": 3436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010600702371448278, + "timestamp": "2025-09-10 02:44:46.374673", + "step": 3437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:46.427879", + "step": 3437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001106555457226932, + "timestamp": "2025-09-10 02:44:46.431913", + "step": 3438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:44:46.493359", + "step": 3438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005646290956065059, + "timestamp": "2025-09-10 02:44:46.504260", + "step": 3439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:46.559268", + "step": 3439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005553652532398701, + "timestamp": "2025-09-10 02:44:46.572149", + "step": 3440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:46.629127", + "step": 3440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006433610338717699, + "timestamp": "2025-09-10 02:44:46.637322", + "step": 3441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:46.699838", + "step": 3441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0076807462610304356, + "timestamp": "2025-09-10 02:44:46.707516", + "step": 3442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:46.760771", + "step": 3442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005716619198210537, + "timestamp": "2025-09-10 02:44:46.762771", + "step": 3443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:46.816841", + "step": 3443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006439946126192808, + "timestamp": "2025-09-10 02:44:46.826682", + "step": 3444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:46.879446", + "step": 3444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033580020535737276, + "timestamp": "2025-09-10 02:44:46.886017", + "step": 3445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:46.940775", + "step": 3445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006483376491814852, + "timestamp": "2025-09-10 02:44:46.944873", + "step": 3446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:46.999588", + "step": 3446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033562954049557447, + "timestamp": "2025-09-10 02:44:47.007313", + "step": 3447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:47.061791", + "step": 3447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017835830803960562, + "timestamp": "2025-09-10 02:44:47.067395", + "step": 3448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:47.121574", + "step": 3448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003997988998889923, + "timestamp": "2025-09-10 02:44:47.129626", + "step": 3449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:47.185941", + "step": 3449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004992308560758829, + "timestamp": "2025-09-10 02:44:47.195520", + "step": 3450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:47.248617", + "step": 3450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021727483719587326, + "timestamp": "2025-09-10 02:44:47.250792", + "step": 3451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:47.304195", + "step": 3451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028200071305036545, + "timestamp": "2025-09-10 02:44:47.311212", + "step": 3452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:47.364206", + "step": 3452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013704189099371433, + "timestamp": "2025-09-10 02:44:47.366419", + "step": 3453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:47.420911", + "step": 3453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029978693928569555, + "timestamp": "2025-09-10 02:44:47.423004", + "step": 3454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:44:47.477389", + "step": 3454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004155627451837063, + "timestamp": "2025-09-10 02:44:47.487193", + "step": 3455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:44:47.554016", + "step": 3455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00584846455603838, + "timestamp": "2025-09-10 02:44:47.567018", + "step": 3456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:47.620407", + "step": 3456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005385205149650574, + "timestamp": "2025-09-10 02:44:47.626074", + "step": 3457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:47.679844", + "step": 3457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034869180526584387, + "timestamp": "2025-09-10 02:44:47.682685", + "step": 3458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:47.735416", + "step": 3458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028078759089112282, + "timestamp": "2025-09-10 02:44:47.738327", + "step": 3459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:47.796901", + "step": 3459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001354985754005611, + "timestamp": "2025-09-10 02:44:47.808132", + "step": 3460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:47.861000", + "step": 3460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005084726959466934, + "timestamp": "2025-09-10 02:44:47.867405", + "step": 3461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:47.921930", + "step": 3461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004792199470102787, + "timestamp": "2025-09-10 02:44:47.924011", + "step": 3462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:47.981300", + "step": 3462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016709257615730166, + "timestamp": "2025-09-10 02:44:47.983586", + "step": 3463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:44:48.044187", + "step": 3463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01289287954568863, + "timestamp": "2025-09-10 02:44:48.055684", + "step": 3464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:48.108568", + "step": 3464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002130532404407859, + "timestamp": "2025-09-10 02:44:48.118606", + "step": 3465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:48.172349", + "step": 3465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025333818048238754, + "timestamp": "2025-09-10 02:44:48.174604", + "step": 3466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:48.228450", + "step": 3466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005801789229735732, + "timestamp": "2025-09-10 02:44:48.231182", + "step": 3467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:48.283882", + "step": 3467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009072088869288564, + "timestamp": "2025-09-10 02:44:48.289506", + "step": 3468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:48.350774", + "step": 3468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016404151916503906, + "timestamp": "2025-09-10 02:44:48.352932", + "step": 3469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:48.409303", + "step": 3469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00040816678665578365, + "timestamp": "2025-09-10 02:44:48.411601", + "step": 3470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:48.465079", + "step": 3470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018683952512219548, + "timestamp": "2025-09-10 02:44:48.467181", + "step": 3471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:48.521728", + "step": 3471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008625198155641556, + "timestamp": "2025-09-10 02:44:48.527598", + "step": 3472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:48.583419", + "step": 3472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001849312917329371, + "timestamp": "2025-09-10 02:44:48.586223", + "step": 3473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:48.639672", + "step": 3473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027316075284034014, + "timestamp": "2025-09-10 02:44:48.641860", + "step": 3474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:48.694349", + "step": 3474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012463848106563091, + "timestamp": "2025-09-10 02:44:48.699869", + "step": 3475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:48.753550", + "step": 3475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006688945926725864, + "timestamp": "2025-09-10 02:44:48.759401", + "step": 3476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:48.812837", + "step": 3476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010269487276673317, + "timestamp": "2025-09-10 02:44:48.815566", + "step": 3477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:48.872709", + "step": 3477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000829762895591557, + "timestamp": "2025-09-10 02:44:48.874958", + "step": 3478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:48.931600", + "step": 3478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018849809421226382, + "timestamp": "2025-09-10 02:44:48.935036", + "step": 3479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:48.992175", + "step": 3479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003088541328907013, + "timestamp": "2025-09-10 02:44:48.997892", + "step": 3480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:49.050406", + "step": 3480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005800743238069117, + "timestamp": "2025-09-10 02:44:49.053368", + "step": 3481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:49.114091", + "step": 3481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01149090938270092, + "timestamp": "2025-09-10 02:44:49.124522", + "step": 3482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:49.178394", + "step": 3482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018863646546378732, + "timestamp": "2025-09-10 02:44:49.182068", + "step": 3483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:49.236428", + "step": 3483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007635973393917084, + "timestamp": "2025-09-10 02:44:49.242502", + "step": 3484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:49.299186", + "step": 3484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001805212115868926, + "timestamp": "2025-09-10 02:44:49.301698", + "step": 3485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:44:49.360240", + "step": 3485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000615614524576813, + "timestamp": "2025-09-10 02:44:49.370653", + "step": 3486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:49.424694", + "step": 3486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00028936678427271545, + "timestamp": "2025-09-10 02:44:49.427213", + "step": 3487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:49.481065", + "step": 3487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001837940071709454, + "timestamp": "2025-09-10 02:44:49.487841", + "step": 3488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:49.540330", + "step": 3488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010537052294239402, + "timestamp": "2025-09-10 02:44:49.542667", + "step": 3489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:49.597692", + "step": 3489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011933451518416405, + "timestamp": "2025-09-10 02:44:49.599957", + "step": 3490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:49.653285", + "step": 3490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005520405829884112, + "timestamp": "2025-09-10 02:44:49.655803", + "step": 3491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:49.709211", + "step": 3491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008819623850286007, + "timestamp": "2025-09-10 02:44:49.715476", + "step": 3492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:49.768897", + "step": 3492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011324294609948993, + "timestamp": "2025-09-10 02:44:49.771123", + "step": 3493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:44:49.838908", + "step": 3493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023660731967538595, + "timestamp": "2025-09-10 02:44:49.851160", + "step": 3494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:49.904710", + "step": 3494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003250342095270753, + "timestamp": "2025-09-10 02:44:49.906866", + "step": 3495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:44:49.967618", + "step": 3495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021458996925503016, + "timestamp": "2025-09-10 02:44:49.979073", + "step": 3496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:50.032929", + "step": 3496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007626359350979328, + "timestamp": "2025-09-10 02:44:50.035275", + "step": 3497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:50.090242", + "step": 3497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000988470739684999, + "timestamp": "2025-09-10 02:44:50.094040", + "step": 3498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:44:50.148569", + "step": 3498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017117972020059824, + "timestamp": "2025-09-10 02:44:50.156294", + "step": 3499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:50.211487", + "step": 3499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000862152490299195, + "timestamp": "2025-09-10 02:44:50.217483", + "step": 3500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 3500", + "timestamp": "2025-09-10 02:44:50.787634", + "step": 3500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:50.845482", + "step": 3500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004377846140414476, + "timestamp": "2025-09-10 02:44:50.847830", + "step": 3501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:50.902552", + "step": 3501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002573088859207928, + "timestamp": "2025-09-10 02:44:50.906285", + "step": 3502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:44:50.961306", + "step": 3502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006194744491949677, + "timestamp": "2025-09-10 02:44:50.963941", + "step": 3503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:51.017320", + "step": 3503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004087744455318898, + "timestamp": "2025-09-10 02:44:51.023700", + "step": 3504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:51.077018", + "step": 3504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011638117954134941, + "timestamp": "2025-09-10 02:44:51.079324", + "step": 3505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:51.132963", + "step": 3505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03122425079345703, + "timestamp": "2025-09-10 02:44:51.140657", + "step": 3506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:51.199885", + "step": 3506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004915146855637431, + "timestamp": "2025-09-10 02:44:51.206998", + "step": 3507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:51.261317", + "step": 3507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00593670504167676, + "timestamp": "2025-09-10 02:44:51.267679", + "step": 3508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:44:51.321084", + "step": 3508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02698325738310814, + "timestamp": "2025-09-10 02:44:51.331046", + "step": 3509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:51.385448", + "step": 3509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024509276263415813, + "timestamp": "2025-09-10 02:44:51.387884", + "step": 3510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:51.444406", + "step": 3510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005396420019678771, + "timestamp": "2025-09-10 02:44:51.446557", + "step": 3511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:51.500453", + "step": 3511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002677281736396253, + "timestamp": "2025-09-10 02:44:51.507089", + "step": 3512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:51.559675", + "step": 3512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004991500172764063, + "timestamp": "2025-09-10 02:44:51.562439", + "step": 3513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:51.615892", + "step": 3513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012850449420511723, + "timestamp": "2025-09-10 02:44:51.618148", + "step": 3514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:44:51.671017", + "step": 3514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006575720617547631, + "timestamp": "2025-09-10 02:44:51.677304", + "step": 3515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:51.731690", + "step": 3515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003706204006448388, + "timestamp": "2025-09-10 02:44:51.737858", + "step": 3516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:44:51.792789", + "step": 3516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012624149210751057, + "timestamp": "2025-09-10 02:44:51.795155", + "step": 3517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:51.853890", + "step": 3517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006648301612585783, + "timestamp": "2025-09-10 02:44:51.860464", + "step": 3518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:51.915028", + "step": 3518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007761465385556221, + "timestamp": "2025-09-10 02:44:51.918558", + "step": 3519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:51.973498", + "step": 3519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003467746078968048, + "timestamp": "2025-09-10 02:44:51.979739", + "step": 3520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:52.036546", + "step": 3520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00293541862629354, + "timestamp": "2025-09-10 02:44:52.047368", + "step": 3521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:52.104036", + "step": 3521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005550007801502943, + "timestamp": "2025-09-10 02:44:52.115549", + "step": 3522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:44:52.179522", + "step": 3522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021561107132583857, + "timestamp": "2025-09-10 02:44:52.184028", + "step": 3523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:44:52.242056", + "step": 3523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006978377350606024, + "timestamp": "2025-09-10 02:44:52.248473", + "step": 3524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:44:52.303006", + "step": 3524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013564244145527482, + "timestamp": "2025-09-10 02:44:52.309278", + "step": 3525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:44:52.380588", + "step": 3525, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032836150377988815, + "timestamp": "2025-09-10 02:44:52.393113", + "step": 3526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:44:52.449221", + "step": 3526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002782290568575263, + "timestamp": "2025-09-10 02:44:52.451742", + "step": 3527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:44:52.511967", + "step": 3527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00013729430793318897, + "timestamp": "2025-09-10 02:44:52.518088", + "step": 3528, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:45:09.573242", + "step": 3528, + "epoch": 2 + }, + { + "type": "pplx", + "content": 24521115.23953575, + "timestamp": "2025-09-10 02:45:09.576298", + "step": 3528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:09.630361", + "step": 3528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00030652660643681884, + "timestamp": "2025-09-10 02:45:09.637749", + "step": 3529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:09.691625", + "step": 3529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004556176718324423, + "timestamp": "2025-09-10 02:45:09.699512", + "step": 3530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:09.752614", + "step": 3530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004627283196896315, + "timestamp": "2025-09-10 02:45:09.754627", + "step": 3531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:45:09.823178", + "step": 3531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03526419773697853, + "timestamp": "2025-09-10 02:45:09.836629", + "step": 3532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:09.889713", + "step": 3532, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.712974573019892e-05, + "timestamp": "2025-09-10 02:45:09.897880", + "step": 3533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:09.953065", + "step": 3533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004412380047142506, + "timestamp": "2025-09-10 02:45:09.962890", + "step": 3534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:10.016148", + "step": 3534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020845436956733465, + "timestamp": "2025-09-10 02:45:10.018263", + "step": 3535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:10.071208", + "step": 3535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018430438649374992, + "timestamp": "2025-09-10 02:45:10.077137", + "step": 3536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:45:10.130207", + "step": 3536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012220273725688457, + "timestamp": "2025-09-10 02:45:10.132348", + "step": 3537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:10.184665", + "step": 3537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004874151200056076, + "timestamp": "2025-09-10 02:45:10.186950", + "step": 3538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:45:10.265697", + "step": 3538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016656635561957955, + "timestamp": "2025-09-10 02:45:10.279786", + "step": 3539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:10.333829", + "step": 3539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016235330840572715, + "timestamp": "2025-09-10 02:45:10.342267", + "step": 3540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:10.394760", + "step": 3540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00011918076052097604, + "timestamp": "2025-09-10 02:45:10.397132", + "step": 3541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:10.450126", + "step": 3541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02568671479821205, + "timestamp": "2025-09-10 02:45:10.452372", + "step": 3542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:10.506330", + "step": 3542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002293122699484229, + "timestamp": "2025-09-10 02:45:10.508719", + "step": 3543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:10.561548", + "step": 3543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008784132078289986, + "timestamp": "2025-09-10 02:45:10.570604", + "step": 3544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:45:10.623075", + "step": 3544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007506661349907517, + "timestamp": "2025-09-10 02:45:10.625353", + "step": 3545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:10.677990", + "step": 3545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006393445190042257, + "timestamp": "2025-09-10 02:45:10.680135", + "step": 3546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:10.733672", + "step": 3546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021343769505620003, + "timestamp": "2025-09-10 02:45:10.735907", + "step": 3547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:10.789439", + "step": 3547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006848170887678862, + "timestamp": "2025-09-10 02:45:10.799871", + "step": 3548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:10.852318", + "step": 3548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00032945116981863976, + "timestamp": "2025-09-10 02:45:10.854549", + "step": 3549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:10.912666", + "step": 3549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009864980820566416, + "timestamp": "2025-09-10 02:45:10.923039", + "step": 3550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:10.975456", + "step": 3550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04976826161146164, + "timestamp": "2025-09-10 02:45:10.978569", + "step": 3551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 12480075828672.0 + }, + "timestamp": "2025-09-10 02:45:11.070594", + "step": 3551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008849497884511948, + "timestamp": "2025-09-10 02:45:11.088764", + "step": 3552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:11.142442", + "step": 3552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006036148406565189, + "timestamp": "2025-09-10 02:45:11.150241", + "step": 3553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:11.203588", + "step": 3553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002904871478676796, + "timestamp": "2025-09-10 02:45:11.206597", + "step": 3554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:45:11.273280", + "step": 3554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0050770253874361515, + "timestamp": "2025-09-10 02:45:11.285550", + "step": 3555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:11.339959", + "step": 3555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019360597943887115, + "timestamp": "2025-09-10 02:45:11.346137", + "step": 3556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:11.399311", + "step": 3556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013183295726776123, + "timestamp": "2025-09-10 02:45:11.401534", + "step": 3557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:11.460011", + "step": 3557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019304242450743914, + "timestamp": "2025-09-10 02:45:11.470432", + "step": 3558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:45:11.543646", + "step": 3558, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00984844658523798, + "timestamp": "2025-09-10 02:45:11.557090", + "step": 3559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:45:11.618937", + "step": 3559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002027039008680731, + "timestamp": "2025-09-10 02:45:11.630752", + "step": 3560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:11.683828", + "step": 3560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009412588551640511, + "timestamp": "2025-09-10 02:45:11.685867", + "step": 3561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:11.739615", + "step": 3561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010938641615211964, + "timestamp": "2025-09-10 02:45:11.749256", + "step": 3562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:11.802379", + "step": 3562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003617644775658846, + "timestamp": "2025-09-10 02:45:11.810498", + "step": 3563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:11.863791", + "step": 3563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002326021669432521, + "timestamp": "2025-09-10 02:45:11.872807", + "step": 3564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:11.926073", + "step": 3564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03195502609014511, + "timestamp": "2025-09-10 02:45:11.935961", + "step": 3565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:11.989375", + "step": 3565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029527253936976194, + "timestamp": "2025-09-10 02:45:11.992089", + "step": 3566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:12.046463", + "step": 3566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019422734156250954, + "timestamp": "2025-09-10 02:45:12.048667", + "step": 3567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:12.101685", + "step": 3567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007054646499454975, + "timestamp": "2025-09-10 02:45:12.108639", + "step": 3568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:12.161690", + "step": 3568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001168784569017589, + "timestamp": "2025-09-10 02:45:12.163834", + "step": 3569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:12.217371", + "step": 3569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021309501607902348, + "timestamp": "2025-09-10 02:45:12.219932", + "step": 3570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:12.273387", + "step": 3570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017772700637578964, + "timestamp": "2025-09-10 02:45:12.275570", + "step": 3571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:12.329048", + "step": 3571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002913970500230789, + "timestamp": "2025-09-10 02:45:12.336302", + "step": 3572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:12.389161", + "step": 3572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015646052779629827, + "timestamp": "2025-09-10 02:45:12.395692", + "step": 3573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:12.448434", + "step": 3573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014464369043707848, + "timestamp": "2025-09-10 02:45:12.450580", + "step": 3574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:12.504138", + "step": 3574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01656530052423477, + "timestamp": "2025-09-10 02:45:12.506250", + "step": 3575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:12.559547", + "step": 3575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004240933805704117, + "timestamp": "2025-09-10 02:45:12.565601", + "step": 3576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:12.618594", + "step": 3576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010632125195115805, + "timestamp": "2025-09-10 02:45:12.621065", + "step": 3577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:45:12.673376", + "step": 3577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021287850104272366, + "timestamp": "2025-09-10 02:45:12.679163", + "step": 3578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:12.732078", + "step": 3578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008523418800905347, + "timestamp": "2025-09-10 02:45:12.739942", + "step": 3579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:12.795580", + "step": 3579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01762009598314762, + "timestamp": "2025-09-10 02:45:12.801404", + "step": 3580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:12.854437", + "step": 3580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06263189762830734, + "timestamp": "2025-09-10 02:45:12.864692", + "step": 3581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:12.917980", + "step": 3581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008568783523514867, + "timestamp": "2025-09-10 02:45:12.920336", + "step": 3582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:12.974470", + "step": 3582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018838342279195786, + "timestamp": "2025-09-10 02:45:12.984069", + "step": 3583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:13.037941", + "step": 3583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021039584651589394, + "timestamp": "2025-09-10 02:45:13.043950", + "step": 3584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:13.096182", + "step": 3584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011109011247754097, + "timestamp": "2025-09-10 02:45:13.098546", + "step": 3585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:13.151599", + "step": 3585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03372924029827118, + "timestamp": "2025-09-10 02:45:13.158151", + "step": 3586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:13.211424", + "step": 3586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005949471378698945, + "timestamp": "2025-09-10 02:45:13.217568", + "step": 3587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:13.273695", + "step": 3587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004848685581237078, + "timestamp": "2025-09-10 02:45:13.279554", + "step": 3588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:13.339505", + "step": 3588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045782726374454796, + "timestamp": "2025-09-10 02:45:13.349798", + "step": 3589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:13.402301", + "step": 3589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029668339993804693, + "timestamp": "2025-09-10 02:45:13.406944", + "step": 3590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:13.460620", + "step": 3590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010383923538029194, + "timestamp": "2025-09-10 02:45:13.463180", + "step": 3591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:13.515713", + "step": 3591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03740331903100014, + "timestamp": "2025-09-10 02:45:13.521625", + "step": 3592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:13.573808", + "step": 3592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005096433451399207, + "timestamp": "2025-09-10 02:45:13.576009", + "step": 3593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:13.628991", + "step": 3593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017126796767115593, + "timestamp": "2025-09-10 02:45:13.635114", + "step": 3594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:13.691161", + "step": 3594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004927968140691519, + "timestamp": "2025-09-10 02:45:13.700817", + "step": 3595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:13.753685", + "step": 3595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008813808672130108, + "timestamp": "2025-09-10 02:45:13.762569", + "step": 3596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:13.817946", + "step": 3596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003934493288397789, + "timestamp": "2025-09-10 02:45:13.819994", + "step": 3597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:13.872973", + "step": 3597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004867472220212221, + "timestamp": "2025-09-10 02:45:13.876126", + "step": 3598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:13.929551", + "step": 3598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005675621214322746, + "timestamp": "2025-09-10 02:45:13.931672", + "step": 3599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:13.984364", + "step": 3599, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002587629482150078, + "timestamp": "2025-09-10 02:45:13.990055", + "step": 3600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:14.042114", + "step": 3600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005651320680044591, + "timestamp": "2025-09-10 02:45:14.045181", + "step": 3601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:14.097750", + "step": 3601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04643111675977707, + "timestamp": "2025-09-10 02:45:14.099942", + "step": 3602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:14.152929", + "step": 3602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0174267441034317, + "timestamp": "2025-09-10 02:45:14.155320", + "step": 3603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:14.208661", + "step": 3603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028429338708519936, + "timestamp": "2025-09-10 02:45:14.214521", + "step": 3604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:14.267199", + "step": 3604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009339532698504627, + "timestamp": "2025-09-10 02:45:14.270166", + "step": 3605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:14.323200", + "step": 3605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03557131811976433, + "timestamp": "2025-09-10 02:45:14.325657", + "step": 3606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:45:14.392043", + "step": 3606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009677738882601261, + "timestamp": "2025-09-10 02:45:14.404269", + "step": 3607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:14.457780", + "step": 3607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006198307033628225, + "timestamp": "2025-09-10 02:45:14.464069", + "step": 3608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:14.519005", + "step": 3608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026686161290854216, + "timestamp": "2025-09-10 02:45:14.521278", + "step": 3609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:14.575101", + "step": 3609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034833138342946768, + "timestamp": "2025-09-10 02:45:14.584759", + "step": 3610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:14.638442", + "step": 3610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003976595588028431, + "timestamp": "2025-09-10 02:45:14.644863", + "step": 3611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:14.698197", + "step": 3611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007812803378328681, + "timestamp": "2025-09-10 02:45:14.704319", + "step": 3612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:14.756140", + "step": 3612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002657910343259573, + "timestamp": "2025-09-10 02:45:14.758589", + "step": 3613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:14.811908", + "step": 3613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002885453635826707, + "timestamp": "2025-09-10 02:45:14.814703", + "step": 3614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:14.867594", + "step": 3614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021188566461205482, + "timestamp": "2025-09-10 02:45:14.869947", + "step": 3615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:14.923025", + "step": 3615, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017414916306734085, + "timestamp": "2025-09-10 02:45:14.929304", + "step": 3616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:14.981346", + "step": 3616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009675375185906887, + "timestamp": "2025-09-10 02:45:14.984522", + "step": 3617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:15.042464", + "step": 3617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009258276782929897, + "timestamp": "2025-09-10 02:45:15.052965", + "step": 3618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:45:15.121195", + "step": 3618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007249352987855673, + "timestamp": "2025-09-10 02:45:15.133793", + "step": 3619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:15.187443", + "step": 3619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013473917497321963, + "timestamp": "2025-09-10 02:45:15.195954", + "step": 3620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:15.248699", + "step": 3620, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028069086838513613, + "timestamp": "2025-09-10 02:45:15.251023", + "step": 3621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:15.303580", + "step": 3621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00933842547237873, + "timestamp": "2025-09-10 02:45:15.306025", + "step": 3622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:15.358644", + "step": 3622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0156002938747406, + "timestamp": "2025-09-10 02:45:15.366824", + "step": 3623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:15.421466", + "step": 3623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035478200763463974, + "timestamp": "2025-09-10 02:45:15.432061", + "step": 3624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:15.484344", + "step": 3624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023858884815126657, + "timestamp": "2025-09-10 02:45:15.491088", + "step": 3625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:15.545633", + "step": 3625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016410350799560547, + "timestamp": "2025-09-10 02:45:15.555399", + "step": 3626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:15.608929", + "step": 3626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003903862088918686, + "timestamp": "2025-09-10 02:45:15.611167", + "step": 3627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:15.663781", + "step": 3627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014552460983395576, + "timestamp": "2025-09-10 02:45:15.672507", + "step": 3628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:45:15.745971", + "step": 3628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00588188087567687, + "timestamp": "2025-09-10 02:45:15.761358", + "step": 3629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:45:15.827682", + "step": 3629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006514329928904772, + "timestamp": "2025-09-10 02:45:15.839894", + "step": 3630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:15.892939", + "step": 3630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008370401337742805, + "timestamp": "2025-09-10 02:45:15.895136", + "step": 3631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:15.948156", + "step": 3631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004211624327581376, + "timestamp": "2025-09-10 02:45:15.954185", + "step": 3632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:16.006366", + "step": 3632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004571163561195135, + "timestamp": "2025-09-10 02:45:16.008761", + "step": 3633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:16.061433", + "step": 3633, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010175072588026524, + "timestamp": "2025-09-10 02:45:16.063787", + "step": 3634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:16.117367", + "step": 3634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02154644951224327, + "timestamp": "2025-09-10 02:45:16.125217", + "step": 3635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:16.178167", + "step": 3635, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037484881468117237, + "timestamp": "2025-09-10 02:45:16.184067", + "step": 3636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:16.236185", + "step": 3636, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010430445894598961, + "timestamp": "2025-09-10 02:45:16.238484", + "step": 3637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:45:16.290988", + "step": 3637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004492948763072491, + "timestamp": "2025-09-10 02:45:16.293104", + "step": 3638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:16.346793", + "step": 3638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005959161091595888, + "timestamp": "2025-09-10 02:45:16.352937", + "step": 3639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:45:16.413891", + "step": 3639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005888893734663725, + "timestamp": "2025-09-10 02:45:16.425474", + "step": 3640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:16.478219", + "step": 3640, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00891589093953371, + "timestamp": "2025-09-10 02:45:16.480442", + "step": 3641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:16.533795", + "step": 3641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01220616977661848, + "timestamp": "2025-09-10 02:45:16.536034", + "step": 3642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:16.589644", + "step": 3642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002008298644796014, + "timestamp": "2025-09-10 02:45:16.599238", + "step": 3643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:16.654071", + "step": 3643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019713356159627438, + "timestamp": "2025-09-10 02:45:16.664483", + "step": 3644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:16.717903", + "step": 3644, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019930541515350342, + "timestamp": "2025-09-10 02:45:16.728366", + "step": 3645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:16.786545", + "step": 3645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006152032408863306, + "timestamp": "2025-09-10 02:45:16.796967", + "step": 3646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:16.849773", + "step": 3646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013385327532887459, + "timestamp": "2025-09-10 02:45:16.851880", + "step": 3647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:16.904691", + "step": 3647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009315438219346106, + "timestamp": "2025-09-10 02:45:16.910569", + "step": 3648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:16.963162", + "step": 3648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020077070221304893, + "timestamp": "2025-09-10 02:45:16.965506", + "step": 3649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:17.018565", + "step": 3649, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00979316420853138, + "timestamp": "2025-09-10 02:45:17.024866", + "step": 3650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:17.077952", + "step": 3650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014990640338510275, + "timestamp": "2025-09-10 02:45:17.086139", + "step": 3651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:45:17.139225", + "step": 3651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008185057900846004, + "timestamp": "2025-09-10 02:45:17.145130", + "step": 3652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:17.197044", + "step": 3652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009565098211169243, + "timestamp": "2025-09-10 02:45:17.200214", + "step": 3653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:45:17.260615", + "step": 3653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018558982759714127, + "timestamp": "2025-09-10 02:45:17.271581", + "step": 3654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:17.325300", + "step": 3654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00753264594823122, + "timestamp": "2025-09-10 02:45:17.332886", + "step": 3655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:17.385948", + "step": 3655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008826500736176968, + "timestamp": "2025-09-10 02:45:17.391916", + "step": 3656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:17.444259", + "step": 3656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00726616894826293, + "timestamp": "2025-09-10 02:45:17.446446", + "step": 3657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:17.499952", + "step": 3657, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024102849885821342, + "timestamp": "2025-09-10 02:45:17.509527", + "step": 3658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:45:17.582428", + "step": 3658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023528921883553267, + "timestamp": "2025-09-10 02:45:17.595887", + "step": 3659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:17.648919", + "step": 3659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003015550086274743, + "timestamp": "2025-09-10 02:45:17.654789", + "step": 3660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:17.707230", + "step": 3660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001021600211970508, + "timestamp": "2025-09-10 02:45:17.709620", + "step": 3661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:45:17.778384", + "step": 3661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009700418449938297, + "timestamp": "2025-09-10 02:45:17.790983", + "step": 3662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:45:17.844412", + "step": 3662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022200671955943108, + "timestamp": "2025-09-10 02:45:17.846898", + "step": 3663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:17.900032", + "step": 3663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033406191505491734, + "timestamp": "2025-09-10 02:45:17.907410", + "step": 3664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:17.964074", + "step": 3664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022803468629717827, + "timestamp": "2025-09-10 02:45:17.975292", + "step": 3665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:18.028638", + "step": 3665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02626602165400982, + "timestamp": "2025-09-10 02:45:18.031776", + "step": 3666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:18.084942", + "step": 3666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030524933245033026, + "timestamp": "2025-09-10 02:45:18.087389", + "step": 3667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:18.140785", + "step": 3667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0135736595839262, + "timestamp": "2025-09-10 02:45:18.146898", + "step": 3668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:18.199669", + "step": 3668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007685056421905756, + "timestamp": "2025-09-10 02:45:18.206054", + "step": 3669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:18.258771", + "step": 3669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008941399282775819, + "timestamp": "2025-09-10 02:45:18.266819", + "step": 3670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:18.320651", + "step": 3670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011505060829222202, + "timestamp": "2025-09-10 02:45:18.330266", + "step": 3671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:18.382886", + "step": 3671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012330555357038975, + "timestamp": "2025-09-10 02:45:18.389204", + "step": 3672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:18.441719", + "step": 3672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002355178352445364, + "timestamp": "2025-09-10 02:45:18.449872", + "step": 3673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:45:18.511259", + "step": 3673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00227890582755208, + "timestamp": "2025-09-10 02:45:18.522150", + "step": 3674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:45:18.591381", + "step": 3674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014533958164975047, + "timestamp": "2025-09-10 02:45:18.604101", + "step": 3675, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:45:35.711353", + "step": 3675, + "epoch": 2 + }, + { + "type": "pplx", + "content": 22935921.714725554, + "timestamp": "2025-09-10 02:45:35.714393", + "step": 3675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:35.769472", + "step": 3675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011157740838825703, + "timestamp": "2025-09-10 02:45:35.776224", + "step": 3676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:35.829385", + "step": 3676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00046526786172762513, + "timestamp": "2025-09-10 02:45:35.839682", + "step": 3677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:35.902823", + "step": 3677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038929337169975042, + "timestamp": "2025-09-10 02:45:35.905737", + "step": 3678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:35.961464", + "step": 3678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003663030220195651, + "timestamp": "2025-09-10 02:45:35.964070", + "step": 3679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:36.018933", + "step": 3679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011066590435802937, + "timestamp": "2025-09-10 02:45:36.029476", + "step": 3680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:36.086316", + "step": 3680, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007109819445759058, + "timestamp": "2025-09-10 02:45:36.097557", + "step": 3681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:36.156804", + "step": 3681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00759152602404356, + "timestamp": "2025-09-10 02:45:36.166398", + "step": 3682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:36.226098", + "step": 3682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007620376418344676, + "timestamp": "2025-09-10 02:45:36.234200", + "step": 3683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:36.298154", + "step": 3683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012494848109781742, + "timestamp": "2025-09-10 02:45:36.309385", + "step": 3684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:36.362293", + "step": 3684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016487921820953488, + "timestamp": "2025-09-10 02:45:36.365100", + "step": 3685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:36.420309", + "step": 3685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020231228321790695, + "timestamp": "2025-09-10 02:45:36.422381", + "step": 3686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:36.481834", + "step": 3686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037350484635680914, + "timestamp": "2025-09-10 02:45:36.491667", + "step": 3687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:36.554933", + "step": 3687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01404801569879055, + "timestamp": "2025-09-10 02:45:36.566083", + "step": 3688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:36.620397", + "step": 3688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003894281107932329, + "timestamp": "2025-09-10 02:45:36.623177", + "step": 3689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:36.677284", + "step": 3689, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01687435619533062, + "timestamp": "2025-09-10 02:45:36.686884", + "step": 3690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:36.751189", + "step": 3690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015043719904497266, + "timestamp": "2025-09-10 02:45:36.758861", + "step": 3691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:45:36.819505", + "step": 3691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008676859550178051, + "timestamp": "2025-09-10 02:45:36.831056", + "step": 3692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:36.888265", + "step": 3692, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006987787783145905, + "timestamp": "2025-09-10 02:45:36.899461", + "step": 3693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:36.953272", + "step": 3693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005440797540359199, + "timestamp": "2025-09-10 02:45:36.955488", + "step": 3694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:37.014134", + "step": 3694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002616985933855176, + "timestamp": "2025-09-10 02:45:37.016654", + "step": 3695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:37.070318", + "step": 3695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009445978328585625, + "timestamp": "2025-09-10 02:45:37.079400", + "step": 3696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:37.135162", + "step": 3696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026948130689561367, + "timestamp": "2025-09-10 02:45:37.144957", + "step": 3697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:37.202055", + "step": 3697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022185503621585667, + "timestamp": "2025-09-10 02:45:37.208582", + "step": 3698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:37.266790", + "step": 3698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001651530183153227, + "timestamp": "2025-09-10 02:45:37.273152", + "step": 3699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:37.331222", + "step": 3699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005250777117908001, + "timestamp": "2025-09-10 02:45:37.337387", + "step": 3700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:37.389778", + "step": 3700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019424583297222853, + "timestamp": "2025-09-10 02:45:37.399900", + "step": 3701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:37.453969", + "step": 3701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02517259307205677, + "timestamp": "2025-09-10 02:45:37.456175", + "step": 3702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:37.508951", + "step": 3702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001004486344754696, + "timestamp": "2025-09-10 02:45:37.517004", + "step": 3703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:37.570698", + "step": 3703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002498834510333836, + "timestamp": "2025-09-10 02:45:37.576629", + "step": 3704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:37.633599", + "step": 3704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00645799282938242, + "timestamp": "2025-09-10 02:45:37.637769", + "step": 3705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:37.691615", + "step": 3705, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.749193562427536e-05, + "timestamp": "2025-09-10 02:45:37.698338", + "step": 3706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:37.751783", + "step": 3706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017861025407910347, + "timestamp": "2025-09-10 02:45:37.754224", + "step": 3707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:37.812588", + "step": 3707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029708804562687874, + "timestamp": "2025-09-10 02:45:37.823148", + "step": 3708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:45:37.882790", + "step": 3708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005415987689048052, + "timestamp": "2025-09-10 02:45:37.894599", + "step": 3709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:37.954552", + "step": 3709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009763292619027197, + "timestamp": "2025-09-10 02:45:37.964355", + "step": 3710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:38.019279", + "step": 3710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001070567755959928, + "timestamp": "2025-09-10 02:45:38.025526", + "step": 3711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:45:38.079475", + "step": 3711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004329554736614227, + "timestamp": "2025-09-10 02:45:38.085389", + "step": 3712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:38.141646", + "step": 3712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05482960492372513, + "timestamp": "2025-09-10 02:45:38.144905", + "step": 3713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:45:38.209777", + "step": 3713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031996474135667086, + "timestamp": "2025-09-10 02:45:38.220655", + "step": 3714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:38.295670", + "step": 3714, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010142551036551595, + "timestamp": "2025-09-10 02:45:38.298686", + "step": 3715, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:38.358500", + "step": 3715, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022924281656742096, + "timestamp": "2025-09-10 02:45:38.371583", + "step": 3716, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:38.430257", + "step": 3716, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008062793058343232, + "timestamp": "2025-09-10 02:45:38.432882", + "step": 3717, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:38.485815", + "step": 3717, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030017667450010777, + "timestamp": "2025-09-10 02:45:38.492381", + "step": 3718, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:38.549621", + "step": 3718, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004409337881952524, + "timestamp": "2025-09-10 02:45:38.559434", + "step": 3719, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:45:38.624674", + "step": 3719, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022649625316262245, + "timestamp": "2025-09-10 02:45:38.636174", + "step": 3720, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:38.692446", + "step": 3720, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005659721209667623, + "timestamp": "2025-09-10 02:45:38.698629", + "step": 3721, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:45:38.760940", + "step": 3721, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003026895865332335, + "timestamp": "2025-09-10 02:45:38.771826", + "step": 3722, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:38.828489", + "step": 3722, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009049681015312672, + "timestamp": "2025-09-10 02:45:38.831283", + "step": 3723, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:38.884178", + "step": 3723, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02390514314174652, + "timestamp": "2025-09-10 02:45:38.889916", + "step": 3724, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:38.942174", + "step": 3724, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03299976512789726, + "timestamp": "2025-09-10 02:45:38.948707", + "step": 3725, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:39.001930", + "step": 3725, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008155884221196175, + "timestamp": "2025-09-10 02:45:39.009542", + "step": 3726, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:39.071247", + "step": 3726, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023253699764609337, + "timestamp": "2025-09-10 02:45:39.081672", + "step": 3727, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:39.134768", + "step": 3727, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03874696418642998, + "timestamp": "2025-09-10 02:45:39.142201", + "step": 3728, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:45:39.201860", + "step": 3728, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010049762204289436, + "timestamp": "2025-09-10 02:45:39.213133", + "step": 3729, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:39.266938", + "step": 3729, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000758556998334825, + "timestamp": "2025-09-10 02:45:39.269658", + "step": 3730, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:39.322436", + "step": 3730, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019734015688300133, + "timestamp": "2025-09-10 02:45:39.329067", + "step": 3731, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:39.385331", + "step": 3731, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005894810426980257, + "timestamp": "2025-09-10 02:45:39.391294", + "step": 3732, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:39.443673", + "step": 3732, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018528493819758296, + "timestamp": "2025-09-10 02:45:39.451913", + "step": 3733, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:45:39.505282", + "step": 3733, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003908081445842981, + "timestamp": "2025-09-10 02:45:39.507958", + "step": 3734, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:39.560945", + "step": 3734, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001145283313235268, + "timestamp": "2025-09-10 02:45:39.567493", + "step": 3735, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:45:39.623179", + "step": 3735, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006715327966958284, + "timestamp": "2025-09-10 02:45:39.628943", + "step": 3736, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 12480075828672.0 + }, + "timestamp": "2025-09-10 02:45:39.718124", + "step": 3736, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009590490721166134, + "timestamp": "2025-09-10 02:45:39.737129", + "step": 3737, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:39.793605", + "step": 3737, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028196871280670166, + "timestamp": "2025-09-10 02:45:39.796820", + "step": 3738, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:39.854233", + "step": 3738, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003894695546478033, + "timestamp": "2025-09-10 02:45:39.856530", + "step": 3739, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:45:39.941299", + "step": 3739, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01211837213486433, + "timestamp": "2025-09-10 02:45:39.955798", + "step": 3740, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:40.009878", + "step": 3740, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007021053694188595, + "timestamp": "2025-09-10 02:45:40.020337", + "step": 3741, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:40.074695", + "step": 3741, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019118876662105322, + "timestamp": "2025-09-10 02:45:40.077108", + "step": 3742, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:40.130352", + "step": 3742, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005367000121623278, + "timestamp": "2025-09-10 02:45:40.132791", + "step": 3743, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:40.186009", + "step": 3743, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006436459254473448, + "timestamp": "2025-09-10 02:45:40.194895", + "step": 3744, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:40.247649", + "step": 3744, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009422756731510162, + "timestamp": "2025-09-10 02:45:40.249828", + "step": 3745, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:40.302501", + "step": 3745, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03521189093589783, + "timestamp": "2025-09-10 02:45:40.304861", + "step": 3746, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:40.358705", + "step": 3746, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012213027803227305, + "timestamp": "2025-09-10 02:45:40.361080", + "step": 3747, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:40.416309", + "step": 3747, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006766091100871563, + "timestamp": "2025-09-10 02:45:40.425851", + "step": 3748, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:45:40.478207", + "step": 3748, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010785062797367573, + "timestamp": "2025-09-10 02:45:40.480601", + "step": 3749, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:40.537037", + "step": 3749, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005762494169175625, + "timestamp": "2025-09-10 02:45:40.539487", + "step": 3750, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:40.594461", + "step": 3750, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008459271863102913, + "timestamp": "2025-09-10 02:45:40.604240", + "step": 3751, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:45:40.679627", + "step": 3751, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013220703694969416, + "timestamp": "2025-09-10 02:45:40.692961", + "step": 3752, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:40.745888", + "step": 3752, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007617729250341654, + "timestamp": "2025-09-10 02:45:40.752343", + "step": 3753, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:40.805366", + "step": 3753, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00389980711042881, + "timestamp": "2025-09-10 02:45:40.810834", + "step": 3754, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:40.865568", + "step": 3754, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0048908935859799385, + "timestamp": "2025-09-10 02:45:40.868981", + "step": 3755, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:40.928441", + "step": 3755, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001440156251192093, + "timestamp": "2025-09-10 02:45:40.935601", + "step": 3756, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:45:40.993390", + "step": 3756, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020689917728304863, + "timestamp": "2025-09-10 02:45:40.995500", + "step": 3757, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:41.053234", + "step": 3757, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023082138504832983, + "timestamp": "2025-09-10 02:45:41.055476", + "step": 3758, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:41.107930", + "step": 3758, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004035149235278368, + "timestamp": "2025-09-10 02:45:41.111325", + "step": 3759, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:41.165716", + "step": 3759, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02253444492816925, + "timestamp": "2025-09-10 02:45:41.173085", + "step": 3760, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:41.227636", + "step": 3760, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03691099211573601, + "timestamp": "2025-09-10 02:45:41.230024", + "step": 3761, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:45:41.320609", + "step": 3761, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024642014876008034, + "timestamp": "2025-09-10 02:45:41.337711", + "step": 3762, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:41.395575", + "step": 3762, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014374173479154706, + "timestamp": "2025-09-10 02:45:41.398834", + "step": 3763, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:41.452457", + "step": 3763, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005778650287538767, + "timestamp": "2025-09-10 02:45:41.458445", + "step": 3764, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:41.511238", + "step": 3764, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020793273579329252, + "timestamp": "2025-09-10 02:45:41.513617", + "step": 3765, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:41.566775", + "step": 3765, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01677837036550045, + "timestamp": "2025-09-10 02:45:41.571323", + "step": 3766, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:41.624563", + "step": 3766, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007229361217468977, + "timestamp": "2025-09-10 02:45:41.628254", + "step": 3767, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:41.682766", + "step": 3767, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0087441960349679, + "timestamp": "2025-09-10 02:45:41.688707", + "step": 3768, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:41.740627", + "step": 3768, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04530872777104378, + "timestamp": "2025-09-10 02:45:41.742942", + "step": 3769, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:41.799060", + "step": 3769, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005834842566400766, + "timestamp": "2025-09-10 02:45:41.803187", + "step": 3770, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:41.858344", + "step": 3770, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011207705363631248, + "timestamp": "2025-09-10 02:45:41.861883", + "step": 3771, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:41.919413", + "step": 3771, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003581272903829813, + "timestamp": "2025-09-10 02:45:41.925437", + "step": 3772, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:41.979713", + "step": 3772, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015710501465946436, + "timestamp": "2025-09-10 02:45:41.989629", + "step": 3773, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:42.047535", + "step": 3773, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007571594323962927, + "timestamp": "2025-09-10 02:45:42.055931", + "step": 3774, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:42.115257", + "step": 3774, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03263309970498085, + "timestamp": "2025-09-10 02:45:42.126131", + "step": 3775, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:42.185795", + "step": 3775, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003294537076726556, + "timestamp": "2025-09-10 02:45:42.199155", + "step": 3776, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:42.263580", + "step": 3776, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017571768257766962, + "timestamp": "2025-09-10 02:45:42.266076", + "step": 3777, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:42.319304", + "step": 3777, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004685258027166128, + "timestamp": "2025-09-10 02:45:42.322399", + "step": 3778, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:42.377471", + "step": 3778, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012189905159175396, + "timestamp": "2025-09-10 02:45:42.386406", + "step": 3779, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:42.442789", + "step": 3779, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0740482434630394, + "timestamp": "2025-09-10 02:45:42.448795", + "step": 3780, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:42.501518", + "step": 3780, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01791914366185665, + "timestamp": "2025-09-10 02:45:42.504315", + "step": 3781, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:42.558103", + "step": 3781, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022058424656279385, + "timestamp": "2025-09-10 02:45:42.560302", + "step": 3782, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:42.617485", + "step": 3782, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028940639458596706, + "timestamp": "2025-09-10 02:45:42.619812", + "step": 3783, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:45:42.676106", + "step": 3783, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016884662210941315, + "timestamp": "2025-09-10 02:45:42.686469", + "step": 3784, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:42.739204", + "step": 3784, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005923439748585224, + "timestamp": "2025-09-10 02:45:42.744920", + "step": 3785, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:42.802424", + "step": 3785, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03181489184498787, + "timestamp": "2025-09-10 02:45:42.804669", + "step": 3786, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:42.862624", + "step": 3786, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002827305346727371, + "timestamp": "2025-09-10 02:45:42.864891", + "step": 3787, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:45:42.961809", + "step": 3787, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00886689592152834, + "timestamp": "2025-09-10 02:45:42.981137", + "step": 3788, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:43.033704", + "step": 3788, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026850944384932518, + "timestamp": "2025-09-10 02:45:43.035944", + "step": 3789, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:43.088915", + "step": 3789, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030168982222676277, + "timestamp": "2025-09-10 02:45:43.092538", + "step": 3790, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:43.146983", + "step": 3790, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013773827813565731, + "timestamp": "2025-09-10 02:45:43.149626", + "step": 3791, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:43.211897", + "step": 3791, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002505003707483411, + "timestamp": "2025-09-10 02:45:43.220942", + "step": 3792, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:43.274547", + "step": 3792, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002235317835584283, + "timestamp": "2025-09-10 02:45:43.282519", + "step": 3793, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:43.338820", + "step": 3793, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030649728141725063, + "timestamp": "2025-09-10 02:45:43.341281", + "step": 3794, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:45:43.394450", + "step": 3794, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008617487736046314, + "timestamp": "2025-09-10 02:45:43.398299", + "step": 3795, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:43.456881", + "step": 3795, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003183516440913081, + "timestamp": "2025-09-10 02:45:43.462835", + "step": 3796, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:43.515887", + "step": 3796, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007166016381233931, + "timestamp": "2025-09-10 02:45:43.522479", + "step": 3797, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:43.575669", + "step": 3797, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0042429291643202305, + "timestamp": "2025-09-10 02:45:43.584056", + "step": 3798, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:43.636785", + "step": 3798, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010450095869600773, + "timestamp": "2025-09-10 02:45:43.641356", + "step": 3799, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:43.694493", + "step": 3799, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010051152668893337, + "timestamp": "2025-09-10 02:45:43.700418", + "step": 3800, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:43.752551", + "step": 3800, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0075791762210428715, + "timestamp": "2025-09-10 02:45:43.757144", + "step": 3801, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:43.813354", + "step": 3801, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007832504925318062, + "timestamp": "2025-09-10 02:45:43.815666", + "step": 3802, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:43.869711", + "step": 3802, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003694644197821617, + "timestamp": "2025-09-10 02:45:43.876272", + "step": 3803, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:43.930776", + "step": 3803, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006956086959689856, + "timestamp": "2025-09-10 02:45:43.938163", + "step": 3804, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:45:44.004949", + "step": 3804, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00568874878808856, + "timestamp": "2025-09-10 02:45:44.018605", + "step": 3805, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:44.075024", + "step": 3805, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010045773349702358, + "timestamp": "2025-09-10 02:45:44.084818", + "step": 3806, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:44.138394", + "step": 3806, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015708133578300476, + "timestamp": "2025-09-10 02:45:44.144663", + "step": 3807, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:45:44.199390", + "step": 3807, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002105496358126402, + "timestamp": "2025-09-10 02:45:44.208624", + "step": 3808, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:45:44.267140", + "step": 3808, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014348874799907207, + "timestamp": "2025-09-10 02:45:44.273795", + "step": 3809, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:44.328573", + "step": 3809, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009345331229269505, + "timestamp": "2025-09-10 02:45:44.331411", + "step": 3810, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:45:44.392589", + "step": 3810, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009542659856379032, + "timestamp": "2025-09-10 02:45:44.402352", + "step": 3811, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:44.455971", + "step": 3811, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021965688094496727, + "timestamp": "2025-09-10 02:45:44.467161", + "step": 3812, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:44.520940", + "step": 3812, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003360508708283305, + "timestamp": "2025-09-10 02:45:44.523979", + "step": 3813, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:44.578882", + "step": 3813, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008107351139187813, + "timestamp": "2025-09-10 02:45:44.587147", + "step": 3814, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:44.645119", + "step": 3814, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003683290909975767, + "timestamp": "2025-09-10 02:45:44.650551", + "step": 3815, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:45:44.707201", + "step": 3815, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004462660755962133, + "timestamp": "2025-09-10 02:45:44.713191", + "step": 3816, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:45:44.767085", + "step": 3816, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01923714391887188, + "timestamp": "2025-09-10 02:45:44.769906", + "step": 3817, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:45:44.827118", + "step": 3817, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022326426114887, + "timestamp": "2025-09-10 02:45:44.830039", + "step": 3818, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:45:44.887663", + "step": 3818, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02179141901433468, + "timestamp": "2025-09-10 02:45:44.890588", + "step": 3819, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:45:44.943519", + "step": 3819, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01936708576977253, + "timestamp": "2025-09-10 02:45:44.952636", + "step": 3820, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:45.006058", + "step": 3820, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002558376407250762, + "timestamp": "2025-09-10 02:45:45.008156", + "step": 3821, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:45:45.065202", + "step": 3821, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026886161416769028, + "timestamp": "2025-09-10 02:45:45.067497", + "step": 3822, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:46:01.849359", + "step": 3822, + "epoch": 2 + }, + { + "type": "pplx", + "content": 25595522.624360383, + "timestamp": "2025-09-10 02:46:01.852615", + "step": 3822, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:46:01.925157", + "step": 3822, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02511192485690117, + "timestamp": "2025-09-10 02:46:01.938790", + "step": 3823, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:46:02.005886", + "step": 3823, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015108300372958183, + "timestamp": "2025-09-10 02:46:02.018868", + "step": 3824, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:02.072955", + "step": 3824, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001703347428701818, + "timestamp": "2025-09-10 02:46:02.075359", + "step": 3825, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:02.130021", + "step": 3825, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0062110270373523235, + "timestamp": "2025-09-10 02:46:02.139816", + "step": 3826, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:46:02.213436", + "step": 3826, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014068561606109142, + "timestamp": "2025-09-10 02:46:02.226946", + "step": 3827, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:02.290791", + "step": 3827, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005498811602592468, + "timestamp": "2025-09-10 02:46:02.301553", + "step": 3828, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:46:02.372910", + "step": 3828, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011925933882594109, + "timestamp": "2025-09-10 02:46:02.386954", + "step": 3829, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:02.449805", + "step": 3829, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025502387434244156, + "timestamp": "2025-09-10 02:46:02.455526", + "step": 3830, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:02.523035", + "step": 3830, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016895176842808723, + "timestamp": "2025-09-10 02:46:02.538564", + "step": 3831, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:02.608071", + "step": 3831, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00885614100843668, + "timestamp": "2025-09-10 02:46:02.621006", + "step": 3832, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:02.678964", + "step": 3832, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010726043954491615, + "timestamp": "2025-09-10 02:46:02.688897", + "step": 3833, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:02.745334", + "step": 3833, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021753041073679924, + "timestamp": "2025-09-10 02:46:02.750667", + "step": 3834, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:02.811040", + "step": 3834, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009483088739216328, + "timestamp": "2025-09-10 02:46:02.820620", + "step": 3835, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:02.885099", + "step": 3835, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008262093178927898, + "timestamp": "2025-09-10 02:46:02.896308", + "step": 3836, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:02.960650", + "step": 3836, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010429082904011011, + "timestamp": "2025-09-10 02:46:02.970913", + "step": 3837, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:03.029441", + "step": 3837, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005868071224540472, + "timestamp": "2025-09-10 02:46:03.037853", + "step": 3838, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:03.098177", + "step": 3838, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011152158258482814, + "timestamp": "2025-09-10 02:46:03.106168", + "step": 3839, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:03.161348", + "step": 3839, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000761518080253154, + "timestamp": "2025-09-10 02:46:03.171952", + "step": 3840, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:03.225150", + "step": 3840, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010029973927885294, + "timestamp": "2025-09-10 02:46:03.227331", + "step": 3841, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:03.280202", + "step": 3841, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00562081765383482, + "timestamp": "2025-09-10 02:46:03.282605", + "step": 3842, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:03.335065", + "step": 3842, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008440883830189705, + "timestamp": "2025-09-10 02:46:03.337208", + "step": 3843, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:03.389815", + "step": 3843, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001560679986141622, + "timestamp": "2025-09-10 02:46:03.396030", + "step": 3844, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:03.448404", + "step": 3844, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00929208192974329, + "timestamp": "2025-09-10 02:46:03.450887", + "step": 3845, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:03.503780", + "step": 3845, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032838734332472086, + "timestamp": "2025-09-10 02:46:03.506230", + "step": 3846, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:46:03.568100", + "step": 3846, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028587591368705034, + "timestamp": "2025-09-10 02:46:03.579207", + "step": 3847, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:46:03.637153", + "step": 3847, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010549718514084816, + "timestamp": "2025-09-10 02:46:03.643160", + "step": 3848, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:03.694868", + "step": 3848, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005692584905773401, + "timestamp": "2025-09-10 02:46:03.697087", + "step": 3849, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:03.749780", + "step": 3849, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006631883326917887, + "timestamp": "2025-09-10 02:46:03.756406", + "step": 3850, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:03.809533", + "step": 3850, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011357649229466915, + "timestamp": "2025-09-10 02:46:03.812496", + "step": 3851, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:03.865762", + "step": 3851, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023502331227064133, + "timestamp": "2025-09-10 02:46:03.871762", + "step": 3852, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:03.923680", + "step": 3852, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003774035722017288, + "timestamp": "2025-09-10 02:46:03.930325", + "step": 3853, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:03.982958", + "step": 3853, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032003470696508884, + "timestamp": "2025-09-10 02:46:03.991152", + "step": 3854, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:04.044554", + "step": 3854, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016245054081082344, + "timestamp": "2025-09-10 02:46:04.046844", + "step": 3855, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:04.098803", + "step": 3855, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008556434186175466, + "timestamp": "2025-09-10 02:46:04.104562", + "step": 3856, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:04.157094", + "step": 3856, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00436136731877923, + "timestamp": "2025-09-10 02:46:04.159454", + "step": 3857, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:04.211886", + "step": 3857, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005428866017609835, + "timestamp": "2025-09-10 02:46:04.214132", + "step": 3858, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:04.267266", + "step": 3858, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02254888415336609, + "timestamp": "2025-09-10 02:46:04.273744", + "step": 3859, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:04.327066", + "step": 3859, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004335789475589991, + "timestamp": "2025-09-10 02:46:04.332997", + "step": 3860, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:46:04.404872", + "step": 3860, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001393162994645536, + "timestamp": "2025-09-10 02:46:04.419817", + "step": 3861, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:04.472889", + "step": 3861, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015901681035757065, + "timestamp": "2025-09-10 02:46:04.475251", + "step": 3862, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:04.528536", + "step": 3862, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010006852447986603, + "timestamp": "2025-09-10 02:46:04.530781", + "step": 3863, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:04.583072", + "step": 3863, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011894852854311466, + "timestamp": "2025-09-10 02:46:04.589042", + "step": 3864, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:46:04.655893", + "step": 3864, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011220744345337152, + "timestamp": "2025-09-10 02:46:04.669657", + "step": 3865, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:04.722855", + "step": 3865, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007972274906933308, + "timestamp": "2025-09-10 02:46:04.730989", + "step": 3866, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:04.784136", + "step": 3866, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008162098936736584, + "timestamp": "2025-09-10 02:46:04.787043", + "step": 3867, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:04.840433", + "step": 3867, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030370864551514387, + "timestamp": "2025-09-10 02:46:04.846455", + "step": 3868, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:04.899994", + "step": 3868, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00035197677789255977, + "timestamp": "2025-09-10 02:46:04.902163", + "step": 3869, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:04.956160", + "step": 3869, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003160616382956505, + "timestamp": "2025-09-10 02:46:04.958647", + "step": 3870, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:05.012049", + "step": 3870, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044348399387672544, + "timestamp": "2025-09-10 02:46:05.014302", + "step": 3871, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:05.067991", + "step": 3871, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.3746625781059265e-05, + "timestamp": "2025-09-10 02:46:05.074048", + "step": 3872, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:05.126657", + "step": 3872, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009846360422670841, + "timestamp": "2025-09-10 02:46:05.136644", + "step": 3873, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:05.190796", + "step": 3873, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004170811735093594, + "timestamp": "2025-09-10 02:46:05.192983", + "step": 3874, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:05.246252", + "step": 3874, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024055906396824867, + "timestamp": "2025-09-10 02:46:05.252757", + "step": 3875, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:05.311281", + "step": 3875, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010387812741100788, + "timestamp": "2025-09-10 02:46:05.321914", + "step": 3876, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:05.374218", + "step": 3876, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006725011044181883, + "timestamp": "2025-09-10 02:46:05.376571", + "step": 3877, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:05.429297", + "step": 3877, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012642050860449672, + "timestamp": "2025-09-10 02:46:05.432504", + "step": 3878, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:05.485635", + "step": 3878, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003001450328156352, + "timestamp": "2025-09-10 02:46:05.487924", + "step": 3879, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:05.540771", + "step": 3879, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013148859143257141, + "timestamp": "2025-09-10 02:46:05.546789", + "step": 3880, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:05.599418", + "step": 3880, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005107710021547973, + "timestamp": "2025-09-10 02:46:05.601789", + "step": 3881, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:05.654552", + "step": 3881, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003785243956372142, + "timestamp": "2025-09-10 02:46:05.657528", + "step": 3882, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:46:05.710094", + "step": 3882, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025510616251267493, + "timestamp": "2025-09-10 02:46:05.712386", + "step": 3883, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:46:05.780521", + "step": 3883, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003945929929614067, + "timestamp": "2025-09-10 02:46:05.793883", + "step": 3884, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:05.846887", + "step": 3884, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015666695544496179, + "timestamp": "2025-09-10 02:46:05.857222", + "step": 3885, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:05.910584", + "step": 3885, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00791245698928833, + "timestamp": "2025-09-10 02:46:05.913000", + "step": 3886, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:05.966030", + "step": 3886, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002426649909466505, + "timestamp": "2025-09-10 02:46:05.968352", + "step": 3887, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:06.021814", + "step": 3887, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004155515693128109, + "timestamp": "2025-09-10 02:46:06.030641", + "step": 3888, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:06.083987", + "step": 3888, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01504812203347683, + "timestamp": "2025-09-10 02:46:06.086431", + "step": 3889, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:46:06.159860", + "step": 3889, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009594624862074852, + "timestamp": "2025-09-10 02:46:06.173568", + "step": 3890, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:06.227312", + "step": 3890, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032929691951721907, + "timestamp": "2025-09-10 02:46:06.233415", + "step": 3891, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:06.286848", + "step": 3891, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026436985936015844, + "timestamp": "2025-09-10 02:46:06.292967", + "step": 3892, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:06.345790", + "step": 3892, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03356518596410751, + "timestamp": "2025-09-10 02:46:06.348113", + "step": 3893, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:06.401014", + "step": 3893, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003165427187923342, + "timestamp": "2025-09-10 02:46:06.403205", + "step": 3894, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:06.456127", + "step": 3894, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013966757105663419, + "timestamp": "2025-09-10 02:46:06.458619", + "step": 3895, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:06.511417", + "step": 3895, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018488811329007149, + "timestamp": "2025-09-10 02:46:06.518867", + "step": 3896, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:06.572627", + "step": 3896, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00148906244430691, + "timestamp": "2025-09-10 02:46:06.575617", + "step": 3897, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:06.630487", + "step": 3897, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01566730998456478, + "timestamp": "2025-09-10 02:46:06.634108", + "step": 3898, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:06.689415", + "step": 3898, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008929139003157616, + "timestamp": "2025-09-10 02:46:06.697645", + "step": 3899, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:06.754706", + "step": 3899, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00037877244176343083, + "timestamp": "2025-09-10 02:46:06.760997", + "step": 3900, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:06.813673", + "step": 3900, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015884646563790739, + "timestamp": "2025-09-10 02:46:06.820306", + "step": 3901, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:06.882060", + "step": 3901, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00897427648305893, + "timestamp": "2025-09-10 02:46:06.892777", + "step": 3902, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:06.950779", + "step": 3902, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014491062611341476, + "timestamp": "2025-09-10 02:46:06.953778", + "step": 3903, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:07.008465", + "step": 3903, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001243895385414362, + "timestamp": "2025-09-10 02:46:07.014663", + "step": 3904, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:07.067582", + "step": 3904, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001384457340463996, + "timestamp": "2025-09-10 02:46:07.074292", + "step": 3905, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:07.135019", + "step": 3905, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008130766800604761, + "timestamp": "2025-09-10 02:46:07.137902", + "step": 3906, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:07.196259", + "step": 3906, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007746769115328789, + "timestamp": "2025-09-10 02:46:07.199364", + "step": 3907, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:07.253549", + "step": 3907, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008873450569808483, + "timestamp": "2025-09-10 02:46:07.262485", + "step": 3908, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:07.318967", + "step": 3908, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002876777434721589, + "timestamp": "2025-09-10 02:46:07.321661", + "step": 3909, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:07.377624", + "step": 3909, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03569520264863968, + "timestamp": "2025-09-10 02:46:07.380450", + "step": 3910, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:07.435323", + "step": 3910, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016105091199278831, + "timestamp": "2025-09-10 02:46:07.438403", + "step": 3911, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:07.496230", + "step": 3911, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023835187312215567, + "timestamp": "2025-09-10 02:46:07.502827", + "step": 3912, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:07.557289", + "step": 3912, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006838030181825161, + "timestamp": "2025-09-10 02:46:07.567784", + "step": 3913, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:07.622185", + "step": 3913, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01887880079448223, + "timestamp": "2025-09-10 02:46:07.624804", + "step": 3914, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:07.679652", + "step": 3914, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010709211230278015, + "timestamp": "2025-09-10 02:46:07.682351", + "step": 3915, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:07.735658", + "step": 3915, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03275291249155998, + "timestamp": "2025-09-10 02:46:07.742387", + "step": 3916, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:07.799570", + "step": 3916, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008088816539384425, + "timestamp": "2025-09-10 02:46:07.801907", + "step": 3917, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:07.856492", + "step": 3917, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006239629001356661, + "timestamp": "2025-09-10 02:46:07.866086", + "step": 3918, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:07.921749", + "step": 3918, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020132127974648029, + "timestamp": "2025-09-10 02:46:07.931504", + "step": 3919, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:07.988521", + "step": 3919, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002257634187117219, + "timestamp": "2025-09-10 02:46:07.995272", + "step": 3920, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:08.049662", + "step": 3920, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016731226816773415, + "timestamp": "2025-09-10 02:46:08.054198", + "step": 3921, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:08.111044", + "step": 3921, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003627836413215846, + "timestamp": "2025-09-10 02:46:08.114466", + "step": 3922, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:08.167932", + "step": 3922, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004804384894669056, + "timestamp": "2025-09-10 02:46:08.170479", + "step": 3923, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:08.226652", + "step": 3923, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010894840583205223, + "timestamp": "2025-09-10 02:46:08.235706", + "step": 3924, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:08.294378", + "step": 3924, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04266980290412903, + "timestamp": "2025-09-10 02:46:08.298076", + "step": 3925, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:08.358574", + "step": 3925, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008919899119064212, + "timestamp": "2025-09-10 02:46:08.369025", + "step": 3926, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:08.423061", + "step": 3926, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006377916433848441, + "timestamp": "2025-09-10 02:46:08.425213", + "step": 3927, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:08.478773", + "step": 3927, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008751966990530491, + "timestamp": "2025-09-10 02:46:08.484776", + "step": 3928, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:08.543298", + "step": 3928, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013560999650508165, + "timestamp": "2025-09-10 02:46:08.554885", + "step": 3929, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:08.608429", + "step": 3929, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005471502896398306, + "timestamp": "2025-09-10 02:46:08.618058", + "step": 3930, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:08.671270", + "step": 3930, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013103344244882464, + "timestamp": "2025-09-10 02:46:08.673895", + "step": 3931, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:08.726901", + "step": 3931, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003612770524341613, + "timestamp": "2025-09-10 02:46:08.733063", + "step": 3932, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:08.786058", + "step": 3932, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000807850738056004, + "timestamp": "2025-09-10 02:46:08.788140", + "step": 3933, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:08.841313", + "step": 3933, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022914016619324684, + "timestamp": "2025-09-10 02:46:08.843692", + "step": 3934, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:08.897066", + "step": 3934, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017113996436819434, + "timestamp": "2025-09-10 02:46:08.899401", + "step": 3935, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:46:08.951954", + "step": 3935, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004442400822881609, + "timestamp": "2025-09-10 02:46:08.958020", + "step": 3936, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:09.010532", + "step": 3936, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005631075706332922, + "timestamp": "2025-09-10 02:46:09.012831", + "step": 3937, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:09.065523", + "step": 3937, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012641689740121365, + "timestamp": "2025-09-10 02:46:09.067892", + "step": 3938, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:09.120651", + "step": 3938, + "epoch": 2 + }, + { + "type": "loss", + "content": 3.472653406788595e-05, + "timestamp": "2025-09-10 02:46:09.122780", + "step": 3939, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:09.176334", + "step": 3939, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003270581131801009, + "timestamp": "2025-09-10 02:46:09.186744", + "step": 3940, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:09.239169", + "step": 3940, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017104193102568388, + "timestamp": "2025-09-10 02:46:09.241235", + "step": 3941, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:09.293931", + "step": 3941, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012531804852187634, + "timestamp": "2025-09-10 02:46:09.302176", + "step": 3942, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:09.356597", + "step": 3942, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021646064706146717, + "timestamp": "2025-09-10 02:46:09.366332", + "step": 3943, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:09.419071", + "step": 3943, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002670682442840189, + "timestamp": "2025-09-10 02:46:09.424801", + "step": 3944, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:09.477180", + "step": 3944, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020014271140098572, + "timestamp": "2025-09-10 02:46:09.483903", + "step": 3945, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:09.537785", + "step": 3945, + "epoch": 2 + }, + { + "type": "loss", + "content": 2.7008974939235486e-05, + "timestamp": "2025-09-10 02:46:09.540215", + "step": 3946, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:09.593962", + "step": 3946, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025508219841867685, + "timestamp": "2025-09-10 02:46:09.601521", + "step": 3947, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:09.655467", + "step": 3947, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004302311688661575, + "timestamp": "2025-09-10 02:46:09.662526", + "step": 3948, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:09.715074", + "step": 3948, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012611711397767067, + "timestamp": "2025-09-10 02:46:09.717917", + "step": 3949, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:46:09.770226", + "step": 3949, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00032069970620796084, + "timestamp": "2025-09-10 02:46:09.772337", + "step": 3950, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:09.825996", + "step": 3950, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009283372201025486, + "timestamp": "2025-09-10 02:46:09.835625", + "step": 3951, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:09.888586", + "step": 3951, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019986514002084732, + "timestamp": "2025-09-10 02:46:09.894271", + "step": 3952, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:09.946680", + "step": 3952, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010985749773681164, + "timestamp": "2025-09-10 02:46:09.948734", + "step": 3953, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:10.001657", + "step": 3953, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025457629817537963, + "timestamp": "2025-09-10 02:46:10.008320", + "step": 3954, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:10.061189", + "step": 3954, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00038253565435297787, + "timestamp": "2025-09-10 02:46:10.067859", + "step": 3955, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:10.121046", + "step": 3955, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.885052713798359e-05, + "timestamp": "2025-09-10 02:46:10.126718", + "step": 3956, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:10.182870", + "step": 3956, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003262778918724507, + "timestamp": "2025-09-10 02:46:10.194036", + "step": 3957, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:10.247119", + "step": 3957, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.039545975625514984, + "timestamp": "2025-09-10 02:46:10.249114", + "step": 3958, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:10.302706", + "step": 3958, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037021856755018234, + "timestamp": "2025-09-10 02:46:10.312005", + "step": 3959, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:10.365560", + "step": 3959, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010134039912372828, + "timestamp": "2025-09-10 02:46:10.371700", + "step": 3960, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:10.425243", + "step": 3960, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001198066514916718, + "timestamp": "2025-09-10 02:46:10.435761", + "step": 3961, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:10.488616", + "step": 3961, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010575448686722666, + "timestamp": "2025-09-10 02:46:10.490784", + "step": 3962, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:10.543843", + "step": 3962, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008649599039927125, + "timestamp": "2025-09-10 02:46:10.545896", + "step": 3963, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:10.598553", + "step": 3963, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014276156201958656, + "timestamp": "2025-09-10 02:46:10.604380", + "step": 3964, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:10.656623", + "step": 3964, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019015094731003046, + "timestamp": "2025-09-10 02:46:10.658607", + "step": 3965, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:10.710982", + "step": 3965, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028332578018307686, + "timestamp": "2025-09-10 02:46:10.713059", + "step": 3966, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:10.765377", + "step": 3966, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004202342126518488, + "timestamp": "2025-09-10 02:46:10.767530", + "step": 3967, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:10.820455", + "step": 3967, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0185786634683609, + "timestamp": "2025-09-10 02:46:10.826149", + "step": 3968, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:10.878803", + "step": 3968, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002318680752068758, + "timestamp": "2025-09-10 02:46:10.880842", + "step": 3969, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:46:27.780620", + "step": 3969, + "epoch": 2 + }, + { + "type": "pplx", + "content": 27331700.28547191, + "timestamp": "2025-09-10 02:46:27.783394", + "step": 3969, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:27.842785", + "step": 3969, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005022897967137396, + "timestamp": "2025-09-10 02:46:27.853452", + "step": 3970, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:46:27.922791", + "step": 3970, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0065833283588290215, + "timestamp": "2025-09-10 02:46:27.935283", + "step": 3971, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:27.989471", + "step": 3971, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01841031201183796, + "timestamp": "2025-09-10 02:46:27.995776", + "step": 3972, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:28.048495", + "step": 3972, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009554126299917698, + "timestamp": "2025-09-10 02:46:28.055010", + "step": 3973, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:28.109030", + "step": 3973, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001987367169931531, + "timestamp": "2025-09-10 02:46:28.111240", + "step": 3974, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:28.164887", + "step": 3974, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036607650690712035, + "timestamp": "2025-09-10 02:46:28.167257", + "step": 3975, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:28.221557", + "step": 3975, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.28004347870592e-05, + "timestamp": "2025-09-10 02:46:28.231965", + "step": 3976, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:28.285192", + "step": 3976, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037857815623283386, + "timestamp": "2025-09-10 02:46:28.287599", + "step": 3977, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:28.340544", + "step": 3977, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025487679988145828, + "timestamp": "2025-09-10 02:46:28.343551", + "step": 3978, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:28.398428", + "step": 3978, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012377269566059113, + "timestamp": "2025-09-10 02:46:28.408282", + "step": 3979, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:28.461600", + "step": 3979, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01761629618704319, + "timestamp": "2025-09-10 02:46:28.467446", + "step": 3980, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:28.519647", + "step": 3980, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007856716401875019, + "timestamp": "2025-09-10 02:46:28.522670", + "step": 3981, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:28.575545", + "step": 3981, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006854567211121321, + "timestamp": "2025-09-10 02:46:28.577654", + "step": 3982, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:28.631410", + "step": 3982, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016330325976014137, + "timestamp": "2025-09-10 02:46:28.641058", + "step": 3983, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:28.694502", + "step": 3983, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017446457641199231, + "timestamp": "2025-09-10 02:46:28.700113", + "step": 3984, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:28.752353", + "step": 3984, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015398569405078888, + "timestamp": "2025-09-10 02:46:28.755123", + "step": 3985, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:28.808978", + "step": 3985, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000944934319704771, + "timestamp": "2025-09-10 02:46:28.811453", + "step": 3986, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:28.865486", + "step": 3986, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016267871484160423, + "timestamp": "2025-09-10 02:46:28.871815", + "step": 3987, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:28.925285", + "step": 3987, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010377265280112624, + "timestamp": "2025-09-10 02:46:28.930982", + "step": 3988, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:28.983954", + "step": 3988, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006066413130611181, + "timestamp": "2025-09-10 02:46:28.990446", + "step": 3989, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:29.043772", + "step": 3989, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02377425506711006, + "timestamp": "2025-09-10 02:46:29.046013", + "step": 3990, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:29.098777", + "step": 3990, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004776769317686558, + "timestamp": "2025-09-10 02:46:29.105285", + "step": 3991, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:29.158719", + "step": 3991, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007428622338920832, + "timestamp": "2025-09-10 02:46:29.164756", + "step": 3992, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:29.216945", + "step": 3992, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014080842956900597, + "timestamp": "2025-09-10 02:46:29.219189", + "step": 3993, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:29.272226", + "step": 3993, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012848807964473963, + "timestamp": "2025-09-10 02:46:29.275274", + "step": 3994, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:29.328492", + "step": 3994, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003719982458278537, + "timestamp": "2025-09-10 02:46:29.330824", + "step": 3995, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:29.383977", + "step": 3995, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02686316706240177, + "timestamp": "2025-09-10 02:46:29.389726", + "step": 3996, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:29.442493", + "step": 3996, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009413144434802234, + "timestamp": "2025-09-10 02:46:29.445385", + "step": 3997, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:29.498492", + "step": 3997, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004147673025727272, + "timestamp": "2025-09-10 02:46:29.505188", + "step": 3998, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:29.558431", + "step": 3998, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000486402481328696, + "timestamp": "2025-09-10 02:46:29.560786", + "step": 3999, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:29.614659", + "step": 3999, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014573262305930257, + "timestamp": "2025-09-10 02:46:29.625056", + "step": 4000, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 4000", + "timestamp": "2025-09-10 02:46:30.127771", + "step": 4000, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:30.185801", + "step": 4000, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013435414293780923, + "timestamp": "2025-09-10 02:46:30.188086", + "step": 4001, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:30.242576", + "step": 4001, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003639021422713995, + "timestamp": "2025-09-10 02:46:30.244928", + "step": 4002, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:30.298024", + "step": 4002, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00489605451002717, + "timestamp": "2025-09-10 02:46:30.300191", + "step": 4003, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:30.353499", + "step": 4003, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009414904634468257, + "timestamp": "2025-09-10 02:46:30.359542", + "step": 4004, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:30.418567", + "step": 4004, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004596610087901354, + "timestamp": "2025-09-10 02:46:30.430127", + "step": 4005, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:30.483390", + "step": 4005, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0251252893358469, + "timestamp": "2025-09-10 02:46:30.485432", + "step": 4006, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:30.538287", + "step": 4006, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00520761264488101, + "timestamp": "2025-09-10 02:46:30.541212", + "step": 4007, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:30.594180", + "step": 4007, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005466113798320293, + "timestamp": "2025-09-10 02:46:30.600005", + "step": 4008, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:30.652682", + "step": 4008, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00451264763250947, + "timestamp": "2025-09-10 02:46:30.654938", + "step": 4009, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:46:30.721196", + "step": 4009, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01480143517255783, + "timestamp": "2025-09-10 02:46:30.733383", + "step": 4010, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:30.787204", + "step": 4010, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025095022283494473, + "timestamp": "2025-09-10 02:46:30.789308", + "step": 4011, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:30.843520", + "step": 4011, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0075421747751533985, + "timestamp": "2025-09-10 02:46:30.849312", + "step": 4012, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:30.902021", + "step": 4012, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03990047797560692, + "timestamp": "2025-09-10 02:46:30.904301", + "step": 4013, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:46:30.970850", + "step": 4013, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004559301305562258, + "timestamp": "2025-09-10 02:46:30.983058", + "step": 4014, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:46:31.055905", + "step": 4014, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011345594190061092, + "timestamp": "2025-09-10 02:46:31.069336", + "step": 4015, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:31.122943", + "step": 4015, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005087972152978182, + "timestamp": "2025-09-10 02:46:31.128597", + "step": 4016, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:31.180958", + "step": 4016, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014413511380553246, + "timestamp": "2025-09-10 02:46:31.182951", + "step": 4017, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:31.237881", + "step": 4017, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00031759761623106897, + "timestamp": "2025-09-10 02:46:31.247690", + "step": 4018, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:31.303449", + "step": 4018, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011032961308956146, + "timestamp": "2025-09-10 02:46:31.313206", + "step": 4019, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:46:31.388472", + "step": 4019, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04681537672877312, + "timestamp": "2025-09-10 02:46:31.403193", + "step": 4020, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:31.456314", + "step": 4020, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014196854317560792, + "timestamp": "2025-09-10 02:46:31.466774", + "step": 4021, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:31.520003", + "step": 4021, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007935740984976292, + "timestamp": "2025-09-10 02:46:31.522087", + "step": 4022, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:31.574996", + "step": 4022, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009013201110064983, + "timestamp": "2025-09-10 02:46:31.576865", + "step": 4023, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:31.629780", + "step": 4023, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006031523924320936, + "timestamp": "2025-09-10 02:46:31.635567", + "step": 4024, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:31.688163", + "step": 4024, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005195514764636755, + "timestamp": "2025-09-10 02:46:31.698289", + "step": 4025, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:31.752387", + "step": 4025, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014662462286651134, + "timestamp": "2025-09-10 02:46:31.760428", + "step": 4026, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:31.814088", + "step": 4026, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.049105431884527206, + "timestamp": "2025-09-10 02:46:31.816032", + "step": 4027, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:31.869151", + "step": 4027, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004056194331496954, + "timestamp": "2025-09-10 02:46:31.875013", + "step": 4028, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:31.927775", + "step": 4028, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005449273739941418, + "timestamp": "2025-09-10 02:46:31.929950", + "step": 4029, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:31.983142", + "step": 4029, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009754737839102745, + "timestamp": "2025-09-10 02:46:31.985495", + "step": 4030, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:32.038685", + "step": 4030, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011415573535487056, + "timestamp": "2025-09-10 02:46:32.044943", + "step": 4031, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:32.098081", + "step": 4031, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044507390703074634, + "timestamp": "2025-09-10 02:46:32.103788", + "step": 4032, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:32.156151", + "step": 4032, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.036054935306310654, + "timestamp": "2025-09-10 02:46:32.166179", + "step": 4033, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:32.219260", + "step": 4033, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011584201827645302, + "timestamp": "2025-09-10 02:46:32.221269", + "step": 4034, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:32.273949", + "step": 4034, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009119933820329607, + "timestamp": "2025-09-10 02:46:32.276113", + "step": 4035, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:32.329356", + "step": 4035, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0072877854108810425, + "timestamp": "2025-09-10 02:46:32.338247", + "step": 4036, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:46:32.405901", + "step": 4036, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037533354479819536, + "timestamp": "2025-09-10 02:46:32.419669", + "step": 4037, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:32.473546", + "step": 4037, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003054562257602811, + "timestamp": "2025-09-10 02:46:32.475790", + "step": 4038, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:46:32.538199", + "step": 4038, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02401849813759327, + "timestamp": "2025-09-10 02:46:32.549304", + "step": 4039, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:32.602707", + "step": 4039, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0076024653390049934, + "timestamp": "2025-09-10 02:46:32.608645", + "step": 4040, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:46:32.669052", + "step": 4040, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013744918396696448, + "timestamp": "2025-09-10 02:46:32.680813", + "step": 4041, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:32.734681", + "step": 4041, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007665363140404224, + "timestamp": "2025-09-10 02:46:32.737000", + "step": 4042, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:32.790469", + "step": 4042, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0051115090027451515, + "timestamp": "2025-09-10 02:46:32.792911", + "step": 4043, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:32.846325", + "step": 4043, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018724999972619116, + "timestamp": "2025-09-10 02:46:32.852418", + "step": 4044, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:32.908622", + "step": 4044, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021889707073569298, + "timestamp": "2025-09-10 02:46:32.919738", + "step": 4045, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:32.974537", + "step": 4045, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005109102930873632, + "timestamp": "2025-09-10 02:46:32.983778", + "step": 4046, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:33.037981", + "step": 4046, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011025074636563659, + "timestamp": "2025-09-10 02:46:33.043490", + "step": 4047, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:33.097768", + "step": 4047, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007205640431493521, + "timestamp": "2025-09-10 02:46:33.104904", + "step": 4048, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:33.159819", + "step": 4048, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029010960832238197, + "timestamp": "2025-09-10 02:46:33.161833", + "step": 4049, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:33.215100", + "step": 4049, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005890341941267252, + "timestamp": "2025-09-10 02:46:33.217970", + "step": 4050, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:33.270944", + "step": 4050, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03647368401288986, + "timestamp": "2025-09-10 02:46:33.273151", + "step": 4051, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:33.327253", + "step": 4051, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008288768003694713, + "timestamp": "2025-09-10 02:46:33.336119", + "step": 4052, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:33.389070", + "step": 4052, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015556575963273644, + "timestamp": "2025-09-10 02:46:33.395123", + "step": 4053, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:33.449106", + "step": 4053, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02547958306968212, + "timestamp": "2025-09-10 02:46:33.458694", + "step": 4054, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:33.512631", + "step": 4054, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005097005516290665, + "timestamp": "2025-09-10 02:46:33.515307", + "step": 4055, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:33.576271", + "step": 4055, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005207826849073172, + "timestamp": "2025-09-10 02:46:33.587777", + "step": 4056, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:46:33.653775", + "step": 4056, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01705167628824711, + "timestamp": "2025-09-10 02:46:33.667429", + "step": 4057, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:33.721327", + "step": 4057, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007770686410367489, + "timestamp": "2025-09-10 02:46:33.723717", + "step": 4058, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:33.778526", + "step": 4058, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009561757557094097, + "timestamp": "2025-09-10 02:46:33.780979", + "step": 4059, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:33.839241", + "step": 4059, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01864950731396675, + "timestamp": "2025-09-10 02:46:33.850473", + "step": 4060, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:33.903702", + "step": 4060, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028672043699771166, + "timestamp": "2025-09-10 02:46:33.909722", + "step": 4061, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:33.963254", + "step": 4061, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007765418849885464, + "timestamp": "2025-09-10 02:46:33.969365", + "step": 4062, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:34.022959", + "step": 4062, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009162651374936104, + "timestamp": "2025-09-10 02:46:34.024841", + "step": 4063, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:34.077548", + "step": 4063, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011173618957400322, + "timestamp": "2025-09-10 02:46:34.083350", + "step": 4064, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:34.136649", + "step": 4064, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0040217977948486805, + "timestamp": "2025-09-10 02:46:34.147154", + "step": 4065, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:34.202270", + "step": 4065, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023825322277843952, + "timestamp": "2025-09-10 02:46:34.204341", + "step": 4066, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:34.257349", + "step": 4066, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013289921917021275, + "timestamp": "2025-09-10 02:46:34.263737", + "step": 4067, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:34.317145", + "step": 4067, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04988854378461838, + "timestamp": "2025-09-10 02:46:34.322863", + "step": 4068, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:34.376189", + "step": 4068, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02614821121096611, + "timestamp": "2025-09-10 02:46:34.378449", + "step": 4069, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:34.439468", + "step": 4069, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007697419263422489, + "timestamp": "2025-09-10 02:46:34.450143", + "step": 4070, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:34.503939", + "step": 4070, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014285015873610973, + "timestamp": "2025-09-10 02:46:34.506479", + "step": 4071, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:34.559735", + "step": 4071, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029927226714789867, + "timestamp": "2025-09-10 02:46:34.565945", + "step": 4072, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:34.618351", + "step": 4072, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009025800041854382, + "timestamp": "2025-09-10 02:46:34.621296", + "step": 4073, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:34.674181", + "step": 4073, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026111751794815063, + "timestamp": "2025-09-10 02:46:34.675979", + "step": 4074, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:46:34.756219", + "step": 4074, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004143944941461086, + "timestamp": "2025-09-10 02:46:34.771333", + "step": 4075, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:34.824390", + "step": 4075, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007681169547140598, + "timestamp": "2025-09-10 02:46:34.830292", + "step": 4076, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:34.884006", + "step": 4076, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003643867326900363, + "timestamp": "2025-09-10 02:46:34.894520", + "step": 4077, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:46:34.946997", + "step": 4077, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012861545197665691, + "timestamp": "2025-09-10 02:46:34.949107", + "step": 4078, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:35.002434", + "step": 4078, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002178559545427561, + "timestamp": "2025-09-10 02:46:35.010494", + "step": 4079, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:35.064456", + "step": 4079, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010461727157235146, + "timestamp": "2025-09-10 02:46:35.070400", + "step": 4080, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:46:35.130962", + "step": 4080, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002877767663449049, + "timestamp": "2025-09-10 02:46:35.142973", + "step": 4081, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:35.197186", + "step": 4081, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01713237166404724, + "timestamp": "2025-09-10 02:46:35.203355", + "step": 4082, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:35.259784", + "step": 4082, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009582663886249065, + "timestamp": "2025-09-10 02:46:35.261889", + "step": 4083, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:35.322784", + "step": 4083, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021030381321907043, + "timestamp": "2025-09-10 02:46:35.334228", + "step": 4084, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:35.389030", + "step": 4084, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040052540600299835, + "timestamp": "2025-09-10 02:46:35.391771", + "step": 4085, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:35.446022", + "step": 4085, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016354167833924294, + "timestamp": "2025-09-10 02:46:35.448986", + "step": 4086, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:35.504182", + "step": 4086, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000843790709041059, + "timestamp": "2025-09-10 02:46:35.509222", + "step": 4087, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:35.565620", + "step": 4087, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003825886407867074, + "timestamp": "2025-09-10 02:46:35.572559", + "step": 4088, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:35.630489", + "step": 4088, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01318382564932108, + "timestamp": "2025-09-10 02:46:35.633135", + "step": 4089, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:46:35.704178", + "step": 4089, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035097163170576096, + "timestamp": "2025-09-10 02:46:35.716732", + "step": 4090, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:35.776278", + "step": 4090, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01913559064269066, + "timestamp": "2025-09-10 02:46:35.779224", + "step": 4091, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:35.837253", + "step": 4091, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011474421247839928, + "timestamp": "2025-09-10 02:46:35.844165", + "step": 4092, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:35.901004", + "step": 4092, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012699338840320706, + "timestamp": "2025-09-10 02:46:35.904307", + "step": 4093, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:35.965682", + "step": 4093, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006102017126977444, + "timestamp": "2025-09-10 02:46:35.968842", + "step": 4094, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:36.022783", + "step": 4094, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001885882462374866, + "timestamp": "2025-09-10 02:46:36.025315", + "step": 4095, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:36.084608", + "step": 4095, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010418168269097805, + "timestamp": "2025-09-10 02:46:36.095778", + "step": 4096, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:36.150720", + "step": 4096, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005161702632904053, + "timestamp": "2025-09-10 02:46:36.153588", + "step": 4097, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:36.207978", + "step": 4097, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002099298406392336, + "timestamp": "2025-09-10 02:46:36.211789", + "step": 4098, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:36.266671", + "step": 4098, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003340943658258766, + "timestamp": "2025-09-10 02:46:36.270009", + "step": 4099, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:46:36.368248", + "step": 4099, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009731748141348362, + "timestamp": "2025-09-10 02:46:36.386175", + "step": 4100, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:36.446100", + "step": 4100, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001977026928216219, + "timestamp": "2025-09-10 02:46:36.449830", + "step": 4101, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:36.504415", + "step": 4101, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008826405392028391, + "timestamp": "2025-09-10 02:46:36.514050", + "step": 4102, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:36.570686", + "step": 4102, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002490654354915023, + "timestamp": "2025-09-10 02:46:36.574451", + "step": 4103, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:36.630946", + "step": 4103, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013069234555587173, + "timestamp": "2025-09-10 02:46:36.640436", + "step": 4104, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:36.697039", + "step": 4104, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011374709429219365, + "timestamp": "2025-09-10 02:46:36.699273", + "step": 4105, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:36.761621", + "step": 4105, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007636964786797762, + "timestamp": "2025-09-10 02:46:36.764480", + "step": 4106, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:36.819165", + "step": 4106, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004490118473768234, + "timestamp": "2025-09-10 02:46:36.824059", + "step": 4107, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:46:36.899554", + "step": 4107, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006942715495824814, + "timestamp": "2025-09-10 02:46:36.913229", + "step": 4108, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:36.975350", + "step": 4108, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005277864169329405, + "timestamp": "2025-09-10 02:46:36.986954", + "step": 4109, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:37.043889", + "step": 4109, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006958580110222101, + "timestamp": "2025-09-10 02:46:37.049703", + "step": 4110, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:37.106522", + "step": 4110, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008101566694676876, + "timestamp": "2025-09-10 02:46:37.112348", + "step": 4111, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:37.168621", + "step": 4111, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010885442607104778, + "timestamp": "2025-09-10 02:46:37.174558", + "step": 4112, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:37.227355", + "step": 4112, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012058460153639317, + "timestamp": "2025-09-10 02:46:37.232018", + "step": 4113, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:37.291019", + "step": 4113, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016954487655311823, + "timestamp": "2025-09-10 02:46:37.293175", + "step": 4114, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:37.346320", + "step": 4114, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035064031835645437, + "timestamp": "2025-09-10 02:46:37.348902", + "step": 4115, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:37.402777", + "step": 4115, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027604959905147552, + "timestamp": "2025-09-10 02:46:37.408887", + "step": 4116, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:46:54.393244", + "step": 4116, + "epoch": 2 + }, + { + "type": "pplx", + "content": 27383942.289708607, + "timestamp": "2025-09-10 02:46:54.396230", + "step": 4116, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:54.450780", + "step": 4116, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010639320826157928, + "timestamp": "2025-09-10 02:46:54.455730", + "step": 4117, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:54.510405", + "step": 4117, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016920131165534258, + "timestamp": "2025-09-10 02:46:54.512656", + "step": 4118, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:54.566590", + "step": 4118, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001813766430132091, + "timestamp": "2025-09-10 02:46:54.576084", + "step": 4119, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:54.630143", + "step": 4119, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003169593634083867, + "timestamp": "2025-09-10 02:46:54.636314", + "step": 4120, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:54.689269", + "step": 4120, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005361930816434324, + "timestamp": "2025-09-10 02:46:54.695455", + "step": 4121, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:54.749198", + "step": 4121, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002701799152418971, + "timestamp": "2025-09-10 02:46:54.751203", + "step": 4122, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:54.804655", + "step": 4122, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006143040489405394, + "timestamp": "2025-09-10 02:46:54.806835", + "step": 4123, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:54.861923", + "step": 4123, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006249502766877413, + "timestamp": "2025-09-10 02:46:54.869025", + "step": 4124, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:54.921840", + "step": 4124, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010239200200885534, + "timestamp": "2025-09-10 02:46:54.924359", + "step": 4125, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:54.978852", + "step": 4125, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025490522966720164, + "timestamp": "2025-09-10 02:46:54.980877", + "step": 4126, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:55.034205", + "step": 4126, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008148782653734088, + "timestamp": "2025-09-10 02:46:55.036659", + "step": 4127, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:55.091086", + "step": 4127, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002787953708320856, + "timestamp": "2025-09-10 02:46:55.097150", + "step": 4128, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:55.150449", + "step": 4128, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013069541892036796, + "timestamp": "2025-09-10 02:46:55.152597", + "step": 4129, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:55.206346", + "step": 4129, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030835315119475126, + "timestamp": "2025-09-10 02:46:55.215972", + "step": 4130, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:55.270221", + "step": 4130, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00046228888095356524, + "timestamp": "2025-09-10 02:46:55.272482", + "step": 4131, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:55.325734", + "step": 4131, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010769153013825417, + "timestamp": "2025-09-10 02:46:55.331820", + "step": 4132, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:55.384403", + "step": 4132, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002228983212262392, + "timestamp": "2025-09-10 02:46:55.386409", + "step": 4133, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:55.444717", + "step": 4133, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03251693397760391, + "timestamp": "2025-09-10 02:46:55.455145", + "step": 4134, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:55.508651", + "step": 4134, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027009884361177683, + "timestamp": "2025-09-10 02:46:55.516714", + "step": 4135, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:55.570060", + "step": 4135, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023150457069277763, + "timestamp": "2025-09-10 02:46:55.575946", + "step": 4136, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:55.628654", + "step": 4136, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02348208613693714, + "timestamp": "2025-09-10 02:46:55.636839", + "step": 4137, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:46:55.699096", + "step": 4137, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002066678134724498, + "timestamp": "2025-09-10 02:46:55.710167", + "step": 4138, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:55.764694", + "step": 4138, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007858966710045934, + "timestamp": "2025-09-10 02:46:55.772747", + "step": 4139, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:55.826213", + "step": 4139, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014663800597190857, + "timestamp": "2025-09-10 02:46:55.832208", + "step": 4140, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:46:55.896995", + "step": 4140, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012350101955235004, + "timestamp": "2025-09-10 02:46:55.910243", + "step": 4141, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:55.964131", + "step": 4141, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019156964495778084, + "timestamp": "2025-09-10 02:46:55.971703", + "step": 4142, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:56.026385", + "step": 4142, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002492027124390006, + "timestamp": "2025-09-10 02:46:56.028674", + "step": 4143, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:56.081806", + "step": 4143, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00240537291392684, + "timestamp": "2025-09-10 02:46:56.087597", + "step": 4144, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:46:56.140412", + "step": 4144, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010689268819987774, + "timestamp": "2025-09-10 02:46:56.142423", + "step": 4145, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:56.195137", + "step": 4145, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012677262537181377, + "timestamp": "2025-09-10 02:46:56.197306", + "step": 4146, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:56.250204", + "step": 4146, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003994545841123909, + "timestamp": "2025-09-10 02:46:56.252525", + "step": 4147, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:46:56.318783", + "step": 4147, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007907947525382042, + "timestamp": "2025-09-10 02:46:56.331815", + "step": 4148, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:56.385652", + "step": 4148, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008695722557604313, + "timestamp": "2025-09-10 02:46:56.390927", + "step": 4149, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:46:56.452800", + "step": 4149, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016970546916127205, + "timestamp": "2025-09-10 02:46:56.463938", + "step": 4150, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:56.517949", + "step": 4150, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034423437900841236, + "timestamp": "2025-09-10 02:46:56.520381", + "step": 4151, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:56.573888", + "step": 4151, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005895387148484588, + "timestamp": "2025-09-10 02:46:56.579990", + "step": 4152, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:56.632439", + "step": 4152, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022216182202100754, + "timestamp": "2025-09-10 02:46:56.634864", + "step": 4153, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:56.687792", + "step": 4153, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.276205335278064e-05, + "timestamp": "2025-09-10 02:46:56.689940", + "step": 4154, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:56.743660", + "step": 4154, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026618402916938066, + "timestamp": "2025-09-10 02:46:56.745700", + "step": 4155, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:56.799045", + "step": 4155, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005132874008268118, + "timestamp": "2025-09-10 02:46:56.805265", + "step": 4156, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:56.857797", + "step": 4156, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02009979449212551, + "timestamp": "2025-09-10 02:46:56.867665", + "step": 4157, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:56.925691", + "step": 4157, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.033425040543079376, + "timestamp": "2025-09-10 02:46:56.936102", + "step": 4158, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:56.994657", + "step": 4158, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018782642437145114, + "timestamp": "2025-09-10 02:46:57.005093", + "step": 4159, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:57.058451", + "step": 4159, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024663552176207304, + "timestamp": "2025-09-10 02:46:57.064595", + "step": 4160, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:57.117960", + "step": 4160, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009812185540795326, + "timestamp": "2025-09-10 02:46:57.120250", + "step": 4161, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:57.173909", + "step": 4161, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03139030560851097, + "timestamp": "2025-09-10 02:46:57.175910", + "step": 4162, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:57.228843", + "step": 4162, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015185572556219995, + "timestamp": "2025-09-10 02:46:57.230951", + "step": 4163, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:46:57.289433", + "step": 4163, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025247910525649786, + "timestamp": "2025-09-10 02:46:57.300663", + "step": 4164, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:57.353790", + "step": 4164, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016043234791141003, + "timestamp": "2025-09-10 02:46:57.356040", + "step": 4165, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:57.409538", + "step": 4165, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003010234097018838, + "timestamp": "2025-09-10 02:46:57.417607", + "step": 4166, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:57.471450", + "step": 4166, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02376936562359333, + "timestamp": "2025-09-10 02:46:57.477707", + "step": 4167, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:46:57.530885", + "step": 4167, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003847723826766014, + "timestamp": "2025-09-10 02:46:57.536752", + "step": 4168, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:57.589197", + "step": 4168, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03598278760910034, + "timestamp": "2025-09-10 02:46:57.599225", + "step": 4169, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:57.652783", + "step": 4169, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018984022317454219, + "timestamp": "2025-09-10 02:46:57.655038", + "step": 4170, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:57.708572", + "step": 4170, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02804272435605526, + "timestamp": "2025-09-10 02:46:57.714692", + "step": 4171, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:57.769096", + "step": 4171, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00391715532168746, + "timestamp": "2025-09-10 02:46:57.779526", + "step": 4172, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:46:57.832352", + "step": 4172, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007302652578800917, + "timestamp": "2025-09-10 02:46:57.834309", + "step": 4173, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:57.894919", + "step": 4173, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010661360807716846, + "timestamp": "2025-09-10 02:46:57.905701", + "step": 4174, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:57.959295", + "step": 4174, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011691556312143803, + "timestamp": "2025-09-10 02:46:57.965609", + "step": 4175, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:58.020089", + "step": 4175, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04713850095868111, + "timestamp": "2025-09-10 02:46:58.026024", + "step": 4176, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:58.078403", + "step": 4176, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016401372849941254, + "timestamp": "2025-09-10 02:46:58.086520", + "step": 4177, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:58.139539", + "step": 4177, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005184911424294114, + "timestamp": "2025-09-10 02:46:58.141570", + "step": 4178, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:46:58.204505", + "step": 4178, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002902874839492142, + "timestamp": "2025-09-10 02:46:58.215648", + "step": 4179, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:58.270556", + "step": 4179, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01087592076510191, + "timestamp": "2025-09-10 02:46:58.276426", + "step": 4180, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:46:58.329790", + "step": 4180, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009179359301924706, + "timestamp": "2025-09-10 02:46:58.340307", + "step": 4181, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:58.394700", + "step": 4181, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00733901048079133, + "timestamp": "2025-09-10 02:46:58.396959", + "step": 4182, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:58.451043", + "step": 4182, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03791997209191322, + "timestamp": "2025-09-10 02:46:58.453943", + "step": 4183, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:58.507245", + "step": 4183, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00893563125282526, + "timestamp": "2025-09-10 02:46:58.513109", + "step": 4184, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:46:58.566286", + "step": 4184, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01717417687177658, + "timestamp": "2025-09-10 02:46:58.568614", + "step": 4185, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:58.621338", + "step": 4185, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010687016882002354, + "timestamp": "2025-09-10 02:46:58.623769", + "step": 4186, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:58.677623", + "step": 4186, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004435994487721473, + "timestamp": "2025-09-10 02:46:58.684014", + "step": 4187, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:58.737901", + "step": 4187, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002909741015173495, + "timestamp": "2025-09-10 02:46:58.743750", + "step": 4188, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:46:58.797016", + "step": 4188, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009057112038135529, + "timestamp": "2025-09-10 02:46:58.799190", + "step": 4189, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:58.852212", + "step": 4189, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025562902446836233, + "timestamp": "2025-09-10 02:46:58.854540", + "step": 4190, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:58.907910", + "step": 4190, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012630063109099865, + "timestamp": "2025-09-10 02:46:58.910692", + "step": 4191, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:58.964011", + "step": 4191, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003869440406560898, + "timestamp": "2025-09-10 02:46:58.970000", + "step": 4192, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:46:59.022327", + "step": 4192, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08045341074466705, + "timestamp": "2025-09-10 02:46:59.028834", + "step": 4193, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:59.082530", + "step": 4193, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021230129525065422, + "timestamp": "2025-09-10 02:46:59.084972", + "step": 4194, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:59.139230", + "step": 4194, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014136239187791944, + "timestamp": "2025-09-10 02:46:59.148859", + "step": 4195, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:59.202393", + "step": 4195, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02922656573355198, + "timestamp": "2025-09-10 02:46:59.208201", + "step": 4196, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:46:59.260960", + "step": 4196, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003514822747092694, + "timestamp": "2025-09-10 02:46:59.270809", + "step": 4197, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:46:59.324391", + "step": 4197, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013680154457688332, + "timestamp": "2025-09-10 02:46:59.326703", + "step": 4198, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:59.380555", + "step": 4198, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015003107255324721, + "timestamp": "2025-09-10 02:46:59.383025", + "step": 4199, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:59.436074", + "step": 4199, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009432598948478699, + "timestamp": "2025-09-10 02:46:59.442151", + "step": 4200, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:59.494609", + "step": 4200, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01662013866007328, + "timestamp": "2025-09-10 02:46:59.497564", + "step": 4201, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:46:59.549887", + "step": 4201, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0062576900236308575, + "timestamp": "2025-09-10 02:46:59.551929", + "step": 4202, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:46:59.604580", + "step": 4202, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00976634118705988, + "timestamp": "2025-09-10 02:46:59.607532", + "step": 4203, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:46:59.660778", + "step": 4203, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025636356440372765, + "timestamp": "2025-09-10 02:46:59.669799", + "step": 4204, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:46:59.728914", + "step": 4204, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000732213375158608, + "timestamp": "2025-09-10 02:46:59.740506", + "step": 4205, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:46:59.793887", + "step": 4205, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016269633546471596, + "timestamp": "2025-09-10 02:46:59.796173", + "step": 4206, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:59.849798", + "step": 4206, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022343024611473083, + "timestamp": "2025-09-10 02:46:59.852200", + "step": 4207, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:46:59.905285", + "step": 4207, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001984843984246254, + "timestamp": "2025-09-10 02:46:59.911266", + "step": 4208, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:46:59.977933", + "step": 4208, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005612724460661411, + "timestamp": "2025-09-10 02:46:59.991515", + "step": 4209, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 848 + ], + "flops": 16960103024960.0 + }, + "timestamp": "2025-09-10 02:47:00.114987", + "step": 4209, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010717118857428432, + "timestamp": "2025-09-10 02:47:00.139058", + "step": 4210, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:00.194483", + "step": 4210, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001120222732424736, + "timestamp": "2025-09-10 02:47:00.204296", + "step": 4211, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:00.258072", + "step": 4211, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006723283790051937, + "timestamp": "2025-09-10 02:47:00.264003", + "step": 4212, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:00.319356", + "step": 4212, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008961746469140053, + "timestamp": "2025-09-10 02:47:00.324288", + "step": 4213, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:00.378142", + "step": 4213, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0066358777694404125, + "timestamp": "2025-09-10 02:47:00.384564", + "step": 4214, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:00.438284", + "step": 4214, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009775841608643532, + "timestamp": "2025-09-10 02:47:00.440823", + "step": 4215, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:47:00.500905", + "step": 4215, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016747374087572098, + "timestamp": "2025-09-10 02:47:00.512369", + "step": 4216, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:47:00.572725", + "step": 4216, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008201695047318935, + "timestamp": "2025-09-10 02:47:00.584670", + "step": 4217, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:00.638359", + "step": 4217, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008140222169458866, + "timestamp": "2025-09-10 02:47:00.640589", + "step": 4218, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:47:00.716208", + "step": 4218, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012579244561493397, + "timestamp": "2025-09-10 02:47:00.730272", + "step": 4219, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:47:00.800496", + "step": 4219, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007998248329386115, + "timestamp": "2025-09-10 02:47:00.813997", + "step": 4220, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:00.868888", + "step": 4220, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006177857052534819, + "timestamp": "2025-09-10 02:47:00.871385", + "step": 4221, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:00.926100", + "step": 4221, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011896011419594288, + "timestamp": "2025-09-10 02:47:00.935902", + "step": 4222, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:00.989884", + "step": 4222, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004865396302193403, + "timestamp": "2025-09-10 02:47:00.992633", + "step": 4223, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:01.045579", + "step": 4223, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0060439822264015675, + "timestamp": "2025-09-10 02:47:01.054520", + "step": 4224, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:01.106900", + "step": 4224, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036904923617839813, + "timestamp": "2025-09-10 02:47:01.114935", + "step": 4225, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:01.168558", + "step": 4225, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025875460356473923, + "timestamp": "2025-09-10 02:47:01.170725", + "step": 4226, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:01.229380", + "step": 4226, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035130020696669817, + "timestamp": "2025-09-10 02:47:01.239838", + "step": 4227, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:01.293331", + "step": 4227, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005314098205417395, + "timestamp": "2025-09-10 02:47:01.299402", + "step": 4228, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:01.353494", + "step": 4228, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015061727724969387, + "timestamp": "2025-09-10 02:47:01.358896", + "step": 4229, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:01.413507", + "step": 4229, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005133180413395166, + "timestamp": "2025-09-10 02:47:01.415973", + "step": 4230, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:47:01.469253", + "step": 4230, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004526397679001093, + "timestamp": "2025-09-10 02:47:01.471532", + "step": 4231, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:47:01.537907", + "step": 4231, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0045868405140936375, + "timestamp": "2025-09-10 02:47:01.550884", + "step": 4232, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:01.603546", + "step": 4232, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002852875739336014, + "timestamp": "2025-09-10 02:47:01.605456", + "step": 4233, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:01.659974", + "step": 4233, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010279986076056957, + "timestamp": "2025-09-10 02:47:01.669781", + "step": 4234, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:01.722852", + "step": 4234, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023315981030464172, + "timestamp": "2025-09-10 02:47:01.729435", + "step": 4235, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:01.782635", + "step": 4235, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011400463059544563, + "timestamp": "2025-09-10 02:47:01.788676", + "step": 4236, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:47:01.853734", + "step": 4236, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022969634737819433, + "timestamp": "2025-09-10 02:47:01.866876", + "step": 4237, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:01.922017", + "step": 4237, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013529693707823753, + "timestamp": "2025-09-10 02:47:01.924522", + "step": 4238, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:01.978957", + "step": 4238, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016862807795405388, + "timestamp": "2025-09-10 02:47:01.981286", + "step": 4239, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:47:02.041957", + "step": 4239, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025355503894388676, + "timestamp": "2025-09-10 02:47:02.053446", + "step": 4240, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:02.106953", + "step": 4240, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010484575293958187, + "timestamp": "2025-09-10 02:47:02.109049", + "step": 4241, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:02.162184", + "step": 4241, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005992446094751358, + "timestamp": "2025-09-10 02:47:02.164390", + "step": 4242, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:02.219482", + "step": 4242, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020115068182349205, + "timestamp": "2025-09-10 02:47:02.229278", + "step": 4243, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:02.283157", + "step": 4243, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003577465657144785, + "timestamp": "2025-09-10 02:47:02.289233", + "step": 4244, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:02.342126", + "step": 4244, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003213457064703107, + "timestamp": "2025-09-10 02:47:02.344363", + "step": 4245, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:02.397029", + "step": 4245, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013622616417706013, + "timestamp": "2025-09-10 02:47:02.400079", + "step": 4246, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:02.453550", + "step": 4246, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036872965283691883, + "timestamp": "2025-09-10 02:47:02.455910", + "step": 4247, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:02.509963", + "step": 4247, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002753880573436618, + "timestamp": "2025-09-10 02:47:02.516126", + "step": 4248, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:02.574157", + "step": 4248, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028752887155860662, + "timestamp": "2025-09-10 02:47:02.580153", + "step": 4249, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:02.635160", + "step": 4249, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001357552013359964, + "timestamp": "2025-09-10 02:47:02.637590", + "step": 4250, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:47:02.734227", + "step": 4250, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006518196314573288, + "timestamp": "2025-09-10 02:47:02.752726", + "step": 4251, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:02.807718", + "step": 4251, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0052061183378100395, + "timestamp": "2025-09-10 02:47:02.815701", + "step": 4252, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:02.869328", + "step": 4252, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0058479164727032185, + "timestamp": "2025-09-10 02:47:02.871672", + "step": 4253, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:47:02.940033", + "step": 4253, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0052193403244018555, + "timestamp": "2025-09-10 02:47:02.952642", + "step": 4254, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:03.006588", + "step": 4254, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011164784664288163, + "timestamp": "2025-09-10 02:47:03.011760", + "step": 4255, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:03.065292", + "step": 4255, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003421362955123186, + "timestamp": "2025-09-10 02:47:03.071359", + "step": 4256, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:03.124053", + "step": 4256, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00365271745249629, + "timestamp": "2025-09-10 02:47:03.134274", + "step": 4257, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:47:03.202031", + "step": 4257, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006833566003479064, + "timestamp": "2025-09-10 02:47:03.212936", + "step": 4258, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:03.266836", + "step": 4258, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012600627960637212, + "timestamp": "2025-09-10 02:47:03.276449", + "step": 4259, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:03.329886", + "step": 4259, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00101366825401783, + "timestamp": "2025-09-10 02:47:03.339043", + "step": 4260, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:47:03.398187", + "step": 4260, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017269121599383652, + "timestamp": "2025-09-10 02:47:03.409705", + "step": 4261, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:03.468023", + "step": 4261, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008999736979603767, + "timestamp": "2025-09-10 02:47:03.470330", + "step": 4262, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:03.527535", + "step": 4262, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004750747699290514, + "timestamp": "2025-09-10 02:47:03.530025", + "step": 4263, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:47:20.555136", + "step": 4263, + "epoch": 2 + }, + { + "type": "pplx", + "content": 25644173.16284924, + "timestamp": "2025-09-10 02:47:20.557910", + "step": 4263, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:20.612045", + "step": 4263, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016500626225024462, + "timestamp": "2025-09-10 02:47:20.621609", + "step": 4264, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:20.674918", + "step": 4264, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004900079686194658, + "timestamp": "2025-09-10 02:47:20.680534", + "step": 4265, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:47:20.733324", + "step": 4265, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019907455425709486, + "timestamp": "2025-09-10 02:47:20.735673", + "step": 4266, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:20.789127", + "step": 4266, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002901068946812302, + "timestamp": "2025-09-10 02:47:20.791404", + "step": 4267, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:20.844339", + "step": 4267, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001220567268319428, + "timestamp": "2025-09-10 02:47:20.851390", + "step": 4268, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:20.908307", + "step": 4268, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025715783704072237, + "timestamp": "2025-09-10 02:47:20.919513", + "step": 4269, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:20.974498", + "step": 4269, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004607465525623411, + "timestamp": "2025-09-10 02:47:20.976877", + "step": 4270, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:21.030611", + "step": 4270, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013449196703732014, + "timestamp": "2025-09-10 02:47:21.032653", + "step": 4271, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:21.085190", + "step": 4271, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003410050878301263, + "timestamp": "2025-09-10 02:47:21.091109", + "step": 4272, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:47:21.149471", + "step": 4272, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0329984612762928, + "timestamp": "2025-09-10 02:47:21.161083", + "step": 4273, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:21.215238", + "step": 4273, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014055393636226654, + "timestamp": "2025-09-10 02:47:21.217820", + "step": 4274, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:21.270713", + "step": 4274, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008442390710115433, + "timestamp": "2025-09-10 02:47:21.272802", + "step": 4275, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:21.325573", + "step": 4275, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004875912272837013, + "timestamp": "2025-09-10 02:47:21.332870", + "step": 4276, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:21.384952", + "step": 4276, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014641110319644213, + "timestamp": "2025-09-10 02:47:21.386827", + "step": 4277, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:21.439352", + "step": 4277, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0051191323436796665, + "timestamp": "2025-09-10 02:47:21.442265", + "step": 4278, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:47:21.503120", + "step": 4278, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016277057584375143, + "timestamp": "2025-09-10 02:47:21.514273", + "step": 4279, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:21.567774", + "step": 4279, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00355696864426136, + "timestamp": "2025-09-10 02:47:21.573852", + "step": 4280, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:21.626882", + "step": 4280, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005710911937057972, + "timestamp": "2025-09-10 02:47:21.629154", + "step": 4281, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:21.683330", + "step": 4281, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006681772647425532, + "timestamp": "2025-09-10 02:47:21.692942", + "step": 4282, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:21.748994", + "step": 4282, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033257752656936646, + "timestamp": "2025-09-10 02:47:21.751506", + "step": 4283, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:21.804798", + "step": 4283, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0073212082497775555, + "timestamp": "2025-09-10 02:47:21.810623", + "step": 4284, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:21.868002", + "step": 4284, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005071139428764582, + "timestamp": "2025-09-10 02:47:21.872237", + "step": 4285, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:21.929796", + "step": 4285, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012945490889251232, + "timestamp": "2025-09-10 02:47:21.939425", + "step": 4286, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:22.006140", + "step": 4286, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021135341376066208, + "timestamp": "2025-09-10 02:47:22.015930", + "step": 4287, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:22.077904", + "step": 4287, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.846382454270497e-05, + "timestamp": "2025-09-10 02:47:22.085323", + "step": 4288, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:22.138448", + "step": 4288, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000921298167668283, + "timestamp": "2025-09-10 02:47:22.146747", + "step": 4289, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:22.205334", + "step": 4289, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003444183384999633, + "timestamp": "2025-09-10 02:47:22.212005", + "step": 4290, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:22.268199", + "step": 4290, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04066583886742592, + "timestamp": "2025-09-10 02:47:22.274795", + "step": 4291, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:22.331016", + "step": 4291, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016234882059507072, + "timestamp": "2025-09-10 02:47:22.336686", + "step": 4292, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:22.389308", + "step": 4292, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006329437252134085, + "timestamp": "2025-09-10 02:47:22.392343", + "step": 4293, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:22.446697", + "step": 4293, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017065482097677886, + "timestamp": "2025-09-10 02:47:22.456233", + "step": 4294, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:22.511983", + "step": 4294, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008375744218938053, + "timestamp": "2025-09-10 02:47:22.521622", + "step": 4295, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:22.576141", + "step": 4295, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009327185340225697, + "timestamp": "2025-09-10 02:47:22.596244", + "step": 4296, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:22.649963", + "step": 4296, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028118512127548456, + "timestamp": "2025-09-10 02:47:22.652330", + "step": 4297, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:22.705789", + "step": 4297, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021826619922649115, + "timestamp": "2025-09-10 02:47:22.708367", + "step": 4298, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:22.762756", + "step": 4298, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02473120577633381, + "timestamp": "2025-09-10 02:47:22.765033", + "step": 4299, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:22.824101", + "step": 4299, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010418480262160301, + "timestamp": "2025-09-10 02:47:22.830332", + "step": 4300, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:22.885674", + "step": 4300, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022602102253586054, + "timestamp": "2025-09-10 02:47:22.892201", + "step": 4301, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:22.947884", + "step": 4301, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003526568762026727, + "timestamp": "2025-09-10 02:47:22.950735", + "step": 4302, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:23.004783", + "step": 4302, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012839640257880092, + "timestamp": "2025-09-10 02:47:23.011225", + "step": 4303, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:23.069550", + "step": 4303, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00187406106851995, + "timestamp": "2025-09-10 02:47:23.079918", + "step": 4304, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:23.133427", + "step": 4304, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009665919351391494, + "timestamp": "2025-09-10 02:47:23.138522", + "step": 4305, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:23.201381", + "step": 4305, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036055262899026275, + "timestamp": "2025-09-10 02:47:23.203984", + "step": 4306, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:23.257178", + "step": 4306, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004071732517331839, + "timestamp": "2025-09-10 02:47:23.272067", + "step": 4307, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:23.335915", + "step": 4307, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008712194859981537, + "timestamp": "2025-09-10 02:47:23.343969", + "step": 4308, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:23.398148", + "step": 4308, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001939832349307835, + "timestamp": "2025-09-10 02:47:23.402555", + "step": 4309, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:23.460393", + "step": 4309, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015591054689139128, + "timestamp": "2025-09-10 02:47:23.467337", + "step": 4310, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:23.521567", + "step": 4310, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003225362452212721, + "timestamp": "2025-09-10 02:47:23.524894", + "step": 4311, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:23.580010", + "step": 4311, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00046219161595217884, + "timestamp": "2025-09-10 02:47:23.587617", + "step": 4312, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:23.639698", + "step": 4312, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006220670184120536, + "timestamp": "2025-09-10 02:47:23.642543", + "step": 4313, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:47:23.703670", + "step": 4313, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008913377299904823, + "timestamp": "2025-09-10 02:47:23.714617", + "step": 4314, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:23.772255", + "step": 4314, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005080437753349543, + "timestamp": "2025-09-10 02:47:23.782675", + "step": 4315, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:23.835501", + "step": 4315, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.659978240961209e-05, + "timestamp": "2025-09-10 02:47:23.841426", + "step": 4316, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:23.894008", + "step": 4316, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001103831920772791, + "timestamp": "2025-09-10 02:47:23.904154", + "step": 4317, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:23.957041", + "step": 4317, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.482850873610005e-05, + "timestamp": "2025-09-10 02:47:23.959203", + "step": 4318, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:24.011631", + "step": 4318, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010703227017074823, + "timestamp": "2025-09-10 02:47:24.014662", + "step": 4319, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:24.067439", + "step": 4319, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008624103502370417, + "timestamp": "2025-09-10 02:47:24.073101", + "step": 4320, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:24.125053", + "step": 4320, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016158433863893151, + "timestamp": "2025-09-10 02:47:24.131764", + "step": 4321, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:24.184882", + "step": 4321, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019937059842050076, + "timestamp": "2025-09-10 02:47:24.187111", + "step": 4322, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:24.239897", + "step": 4322, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021892760414630175, + "timestamp": "2025-09-10 02:47:24.242117", + "step": 4323, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:24.296569", + "step": 4323, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002153662935597822, + "timestamp": "2025-09-10 02:47:24.307147", + "step": 4324, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:24.359664", + "step": 4324, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015940495359245688, + "timestamp": "2025-09-10 02:47:24.361676", + "step": 4325, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:24.414472", + "step": 4325, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004014693666249514, + "timestamp": "2025-09-10 02:47:24.416673", + "step": 4326, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:24.470093", + "step": 4326, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009683924727141857, + "timestamp": "2025-09-10 02:47:24.472239", + "step": 4327, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:24.525324", + "step": 4327, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016802401514723897, + "timestamp": "2025-09-10 02:47:24.531298", + "step": 4328, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:24.583683", + "step": 4328, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007548587862402201, + "timestamp": "2025-09-10 02:47:24.593898", + "step": 4329, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:47:24.647445", + "step": 4329, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004307155904825777, + "timestamp": "2025-09-10 02:47:24.649541", + "step": 4330, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:47:24.711439", + "step": 4330, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032655613031238317, + "timestamp": "2025-09-10 02:47:24.722600", + "step": 4331, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:24.777002", + "step": 4331, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02425667643547058, + "timestamp": "2025-09-10 02:47:24.783149", + "step": 4332, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:24.835310", + "step": 4332, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002336332807317376, + "timestamp": "2025-09-10 02:47:24.838283", + "step": 4333, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:47:24.907017", + "step": 4333, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016943739028647542, + "timestamp": "2025-09-10 02:47:24.919743", + "step": 4334, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:24.972647", + "step": 4334, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05163237825036049, + "timestamp": "2025-09-10 02:47:24.975635", + "step": 4335, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:25.028542", + "step": 4335, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012770164175890386, + "timestamp": "2025-09-10 02:47:25.035960", + "step": 4336, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:25.087963", + "step": 4336, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023083502892404795, + "timestamp": "2025-09-10 02:47:25.091059", + "step": 4337, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:25.144155", + "step": 4337, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.056472379714250565, + "timestamp": "2025-09-10 02:47:25.146165", + "step": 4338, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:25.198933", + "step": 4338, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045358770876191556, + "timestamp": "2025-09-10 02:47:25.200947", + "step": 4339, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:47:25.271774", + "step": 4339, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016407015500590205, + "timestamp": "2025-09-10 02:47:25.285370", + "step": 4340, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:25.338801", + "step": 4340, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02011013962328434, + "timestamp": "2025-09-10 02:47:25.341070", + "step": 4341, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:25.394716", + "step": 4341, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004920617211610079, + "timestamp": "2025-09-10 02:47:25.397148", + "step": 4342, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:25.450058", + "step": 4342, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01722615212202072, + "timestamp": "2025-09-10 02:47:25.452234", + "step": 4343, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:25.505444", + "step": 4343, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028729308396577835, + "timestamp": "2025-09-10 02:47:25.511093", + "step": 4344, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:25.564881", + "step": 4344, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00216209446080029, + "timestamp": "2025-09-10 02:47:25.566984", + "step": 4345, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:25.619792", + "step": 4345, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00043724634451791644, + "timestamp": "2025-09-10 02:47:25.622083", + "step": 4346, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:25.675573", + "step": 4346, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02942570671439171, + "timestamp": "2025-09-10 02:47:25.682206", + "step": 4347, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:25.735464", + "step": 4347, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001083740935428068, + "timestamp": "2025-09-10 02:47:25.741207", + "step": 4348, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:25.794934", + "step": 4348, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001994392601773143, + "timestamp": "2025-09-10 02:47:25.805431", + "step": 4349, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:25.858525", + "step": 4349, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007920349016785622, + "timestamp": "2025-09-10 02:47:25.860944", + "step": 4350, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:25.914208", + "step": 4350, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.946012869477272e-05, + "timestamp": "2025-09-10 02:47:25.916488", + "step": 4351, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:25.970165", + "step": 4351, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00013539181964006275, + "timestamp": "2025-09-10 02:47:25.976140", + "step": 4352, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:26.028887", + "step": 4352, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017476447392255068, + "timestamp": "2025-09-10 02:47:26.031425", + "step": 4353, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:26.084105", + "step": 4353, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012519230949692428, + "timestamp": "2025-09-10 02:47:26.087412", + "step": 4354, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:26.141005", + "step": 4354, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020083349954802543, + "timestamp": "2025-09-10 02:47:26.147558", + "step": 4355, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:26.200371", + "step": 4355, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002472948399372399, + "timestamp": "2025-09-10 02:47:26.206234", + "step": 4356, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:26.259881", + "step": 4356, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002402596641331911, + "timestamp": "2025-09-10 02:47:26.270159", + "step": 4357, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:26.325044", + "step": 4357, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002291470766067505, + "timestamp": "2025-09-10 02:47:26.334835", + "step": 4358, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:26.388028", + "step": 4358, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004171359760221094, + "timestamp": "2025-09-10 02:47:26.390157", + "step": 4359, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:26.443648", + "step": 4359, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000416089576901868, + "timestamp": "2025-09-10 02:47:26.449400", + "step": 4360, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:26.501770", + "step": 4360, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012556105852127075, + "timestamp": "2025-09-10 02:47:26.504180", + "step": 4361, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:26.558772", + "step": 4361, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003699475491885096, + "timestamp": "2025-09-10 02:47:26.568567", + "step": 4362, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:26.621835", + "step": 4362, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029870334547013044, + "timestamp": "2025-09-10 02:47:26.624027", + "step": 4363, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:26.677227", + "step": 4363, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008871073834598064, + "timestamp": "2025-09-10 02:47:26.682959", + "step": 4364, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:26.735316", + "step": 4364, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008695862488821149, + "timestamp": "2025-09-10 02:47:26.743558", + "step": 4365, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:26.796770", + "step": 4365, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004937638994306326, + "timestamp": "2025-09-10 02:47:26.803292", + "step": 4366, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:26.856147", + "step": 4366, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00038243673043325543, + "timestamp": "2025-09-10 02:47:26.859179", + "step": 4367, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:47:26.912932", + "step": 4367, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00197978294454515, + "timestamp": "2025-09-10 02:47:26.918699", + "step": 4368, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:26.971602", + "step": 4368, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035370823461562395, + "timestamp": "2025-09-10 02:47:26.974181", + "step": 4369, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:27.027660", + "step": 4369, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02326894737780094, + "timestamp": "2025-09-10 02:47:27.030161", + "step": 4370, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:27.083290", + "step": 4370, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08179046213626862, + "timestamp": "2025-09-10 02:47:27.085755", + "step": 4371, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:27.138643", + "step": 4371, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025862836628220975, + "timestamp": "2025-09-10 02:47:27.144786", + "step": 4372, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:27.197129", + "step": 4372, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0045958468690514565, + "timestamp": "2025-09-10 02:47:27.200182", + "step": 4373, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:27.253033", + "step": 4373, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011955601163208485, + "timestamp": "2025-09-10 02:47:27.255362", + "step": 4374, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:27.309125", + "step": 4374, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000381296529667452, + "timestamp": "2025-09-10 02:47:27.312199", + "step": 4375, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:27.365417", + "step": 4375, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005038236267864704, + "timestamp": "2025-09-10 02:47:27.371676", + "step": 4376, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:27.424881", + "step": 4376, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005474306643009186, + "timestamp": "2025-09-10 02:47:27.427818", + "step": 4377, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:27.481370", + "step": 4377, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0186654981225729, + "timestamp": "2025-09-10 02:47:27.489443", + "step": 4378, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:47:27.550403", + "step": 4378, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01948568783700466, + "timestamp": "2025-09-10 02:47:27.561184", + "step": 4379, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:47:27.622433", + "step": 4379, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02189786732196808, + "timestamp": "2025-09-10 02:47:27.633944", + "step": 4380, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:27.687133", + "step": 4380, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000145500831422396, + "timestamp": "2025-09-10 02:47:27.689556", + "step": 4381, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:47:27.750020", + "step": 4381, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005379405338317156, + "timestamp": "2025-09-10 02:47:27.760798", + "step": 4382, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:27.814708", + "step": 4382, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008759471820667386, + "timestamp": "2025-09-10 02:47:27.817936", + "step": 4383, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:27.872522", + "step": 4383, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036357541102916002, + "timestamp": "2025-09-10 02:47:27.878528", + "step": 4384, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:27.931721", + "step": 4384, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011262218467891216, + "timestamp": "2025-09-10 02:47:27.934234", + "step": 4385, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:27.987688", + "step": 4385, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036622449988499284, + "timestamp": "2025-09-10 02:47:27.990213", + "step": 4386, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:28.043575", + "step": 4386, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03118307515978813, + "timestamp": "2025-09-10 02:47:28.045842", + "step": 4387, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:28.098880", + "step": 4387, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021011371165513992, + "timestamp": "2025-09-10 02:47:28.104789", + "step": 4388, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:28.161951", + "step": 4388, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016629895195364952, + "timestamp": "2025-09-10 02:47:28.173193", + "step": 4389, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:28.226099", + "step": 4389, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039963508024811745, + "timestamp": "2025-09-10 02:47:28.229293", + "step": 4390, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:28.284261", + "step": 4390, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005318841896951199, + "timestamp": "2025-09-10 02:47:28.294078", + "step": 4391, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:28.347708", + "step": 4391, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018943492323160172, + "timestamp": "2025-09-10 02:47:28.353552", + "step": 4392, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:47:28.432124", + "step": 4392, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002766832709312439, + "timestamp": "2025-09-10 02:47:28.448630", + "step": 4393, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:28.502893", + "step": 4393, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033191312104463577, + "timestamp": "2025-09-10 02:47:28.505114", + "step": 4394, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:28.564329", + "step": 4394, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00220515881665051, + "timestamp": "2025-09-10 02:47:28.574763", + "step": 4395, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:28.628308", + "step": 4395, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002763511147350073, + "timestamp": "2025-09-10 02:47:28.634534", + "step": 4396, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:28.688040", + "step": 4396, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04486337676644325, + "timestamp": "2025-09-10 02:47:28.690279", + "step": 4397, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:28.744037", + "step": 4397, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006099449936300516, + "timestamp": "2025-09-10 02:47:28.746327", + "step": 4398, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:47:28.807899", + "step": 4398, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005696186330169439, + "timestamp": "2025-09-10 02:47:28.818823", + "step": 4399, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:28.872588", + "step": 4399, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006501591997221112, + "timestamp": "2025-09-10 02:47:28.878378", + "step": 4400, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:47:28.944775", + "step": 4400, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001073510036803782, + "timestamp": "2025-09-10 02:47:28.958395", + "step": 4401, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:29.017340", + "step": 4401, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008090257761068642, + "timestamp": "2025-09-10 02:47:29.027786", + "step": 4402, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:29.081620", + "step": 4402, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020783047657459974, + "timestamp": "2025-09-10 02:47:29.083927", + "step": 4403, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:29.136892", + "step": 4403, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015986433252692223, + "timestamp": "2025-09-10 02:47:29.142730", + "step": 4404, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:29.195275", + "step": 4404, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004745179321616888, + "timestamp": "2025-09-10 02:47:29.202061", + "step": 4405, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:29.255072", + "step": 4405, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014310779049992561, + "timestamp": "2025-09-10 02:47:29.257431", + "step": 4406, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:29.310227", + "step": 4406, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026371285784989595, + "timestamp": "2025-09-10 02:47:29.312677", + "step": 4407, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:29.371048", + "step": 4407, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04145417362451553, + "timestamp": "2025-09-10 02:47:29.382259", + "step": 4408, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:29.435075", + "step": 4408, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006641158368438482, + "timestamp": "2025-09-10 02:47:29.437470", + "step": 4409, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:29.490578", + "step": 4409, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005713313585147262, + "timestamp": "2025-09-10 02:47:29.492923", + "step": 4410, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:47:46.372443", + "step": 4410, + "epoch": 2 + }, + { + "type": "pplx", + "content": 26986518.333128188, + "timestamp": "2025-09-10 02:47:46.375775", + "step": 4410, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:46.431427", + "step": 4410, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00368632678873837, + "timestamp": "2025-09-10 02:47:46.436209", + "step": 4411, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:46.490376", + "step": 4411, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013429404934868217, + "timestamp": "2025-09-10 02:47:46.496991", + "step": 4412, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:46.552492", + "step": 4412, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0072911703027784824, + "timestamp": "2025-09-10 02:47:46.554516", + "step": 4413, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:46.609303", + "step": 4413, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013402380980551243, + "timestamp": "2025-09-10 02:47:46.618086", + "step": 4414, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:46.672426", + "step": 4414, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004903607070446014, + "timestamp": "2025-09-10 02:47:46.678212", + "step": 4415, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:46.731758", + "step": 4415, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009532332420349121, + "timestamp": "2025-09-10 02:47:46.738674", + "step": 4416, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:46.796297", + "step": 4416, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034129994455724955, + "timestamp": "2025-09-10 02:47:46.807498", + "step": 4417, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:46.862238", + "step": 4417, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019939128309488297, + "timestamp": "2025-09-10 02:47:46.871800", + "step": 4418, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:46.926254", + "step": 4418, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007876886054873466, + "timestamp": "2025-09-10 02:47:46.928562", + "step": 4419, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:46.981815", + "step": 4419, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020741092041134834, + "timestamp": "2025-09-10 02:47:46.988171", + "step": 4420, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:47.041047", + "step": 4420, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008140448480844498, + "timestamp": "2025-09-10 02:47:47.043534", + "step": 4421, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:47.096634", + "step": 4421, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026586491148918867, + "timestamp": "2025-09-10 02:47:47.101033", + "step": 4422, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:47.160595", + "step": 4422, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011719962349161506, + "timestamp": "2025-09-10 02:47:47.162787", + "step": 4423, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:47.216280", + "step": 4423, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025557424873113632, + "timestamp": "2025-09-10 02:47:47.226716", + "step": 4424, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:47.279396", + "step": 4424, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019124869722872972, + "timestamp": "2025-09-10 02:47:47.289294", + "step": 4425, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:47.343599", + "step": 4425, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00876543577760458, + "timestamp": "2025-09-10 02:47:47.345977", + "step": 4426, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:47.399377", + "step": 4426, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014942241832613945, + "timestamp": "2025-09-10 02:47:47.407427", + "step": 4427, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:47.460420", + "step": 4427, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005349894636310637, + "timestamp": "2025-09-10 02:47:47.466715", + "step": 4428, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:47.519601", + "step": 4428, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004284702241420746, + "timestamp": "2025-09-10 02:47:47.521834", + "step": 4429, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:47.575073", + "step": 4429, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027856361120939255, + "timestamp": "2025-09-10 02:47:47.577462", + "step": 4430, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:47.634791", + "step": 4430, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001220623031258583, + "timestamp": "2025-09-10 02:47:47.641113", + "step": 4431, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:47:47.702690", + "step": 4431, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017930852249264717, + "timestamp": "2025-09-10 02:47:47.714150", + "step": 4432, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:47:47.767014", + "step": 4432, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00795585848391056, + "timestamp": "2025-09-10 02:47:47.769221", + "step": 4433, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:47:47.822118", + "step": 4433, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000394886068534106, + "timestamp": "2025-09-10 02:47:47.824665", + "step": 4434, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:47.878050", + "step": 4434, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001962206093594432, + "timestamp": "2025-09-10 02:47:47.880975", + "step": 4435, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:47.934274", + "step": 4435, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004236937500536442, + "timestamp": "2025-09-10 02:47:47.940325", + "step": 4436, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:47:48.014896", + "step": 4436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005876132054254413, + "timestamp": "2025-09-10 02:47:48.030310", + "step": 4437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:48.084293", + "step": 4437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005827220622450113, + "timestamp": "2025-09-10 02:47:48.087091", + "step": 4438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:48.140322", + "step": 4438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026568984612822533, + "timestamp": "2025-09-10 02:47:48.142936", + "step": 4439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:48.196893", + "step": 4439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014371698722243309, + "timestamp": "2025-09-10 02:47:48.207233", + "step": 4440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:48.260269", + "step": 4440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005732525605708361, + "timestamp": "2025-09-10 02:47:48.262402", + "step": 4441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:47:48.328900", + "step": 4441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007794953417032957, + "timestamp": "2025-09-10 02:47:48.341135", + "step": 4442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:48.394938", + "step": 4442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007660404080525041, + "timestamp": "2025-09-10 02:47:48.398067", + "step": 4443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:48.451564", + "step": 4443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017210535006597638, + "timestamp": "2025-09-10 02:47:48.457591", + "step": 4444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:47:48.517957", + "step": 4444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009950220119208097, + "timestamp": "2025-09-10 02:47:48.529948", + "step": 4445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:48.583845", + "step": 4445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04135027155280113, + "timestamp": "2025-09-10 02:47:48.586068", + "step": 4446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:48.640996", + "step": 4446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027567947283387184, + "timestamp": "2025-09-10 02:47:48.650796", + "step": 4447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:47:48.717830", + "step": 4447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012885911855846643, + "timestamp": "2025-09-10 02:47:48.730870", + "step": 4448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:48.783238", + "step": 4448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004028314258903265, + "timestamp": "2025-09-10 02:47:48.785386", + "step": 4449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:47:48.852207", + "step": 4449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03624418377876282, + "timestamp": "2025-09-10 02:47:48.864420", + "step": 4450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:48.917895", + "step": 4450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01659783348441124, + "timestamp": "2025-09-10 02:47:48.920762", + "step": 4451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:48.974183", + "step": 4451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03107641078531742, + "timestamp": "2025-09-10 02:47:48.982814", + "step": 4452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:49.035361", + "step": 4452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007877948693931103, + "timestamp": "2025-09-10 02:47:49.037827", + "step": 4453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:49.091001", + "step": 4453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0068859620951116085, + "timestamp": "2025-09-10 02:47:49.097542", + "step": 4454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:49.150756", + "step": 4454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001690584933385253, + "timestamp": "2025-09-10 02:47:49.153114", + "step": 4455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:49.206808", + "step": 4455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005641180323436856, + "timestamp": "2025-09-10 02:47:49.217219", + "step": 4456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:49.270188", + "step": 4456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004897342063486576, + "timestamp": "2025-09-10 02:47:49.278386", + "step": 4457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:49.331149", + "step": 4457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002342212712392211, + "timestamp": "2025-09-10 02:47:49.333383", + "step": 4458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:49.386149", + "step": 4458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001183982822112739, + "timestamp": "2025-09-10 02:47:49.388411", + "step": 4459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:49.442338", + "step": 4459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009718267247080803, + "timestamp": "2025-09-10 02:47:49.448215", + "step": 4460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:49.500639", + "step": 4460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00479681883007288, + "timestamp": "2025-09-10 02:47:49.508892", + "step": 4461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:49.567338", + "step": 4461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010085642337799072, + "timestamp": "2025-09-10 02:47:49.577781", + "step": 4462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:49.631572", + "step": 4462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04318024963140488, + "timestamp": "2025-09-10 02:47:49.633912", + "step": 4463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:49.692634", + "step": 4463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014200977981090546, + "timestamp": "2025-09-10 02:47:49.703836", + "step": 4464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:49.756876", + "step": 4464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004874747712165117, + "timestamp": "2025-09-10 02:47:49.759193", + "step": 4465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:49.812307", + "step": 4465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000795876607298851, + "timestamp": "2025-09-10 02:47:49.818982", + "step": 4466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:49.872125", + "step": 4466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014039665693417192, + "timestamp": "2025-09-10 02:47:49.880235", + "step": 4467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:49.938844", + "step": 4467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004372471943497658, + "timestamp": "2025-09-10 02:47:49.950031", + "step": 4468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:50.002702", + "step": 4468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004679134581238031, + "timestamp": "2025-09-10 02:47:50.008993", + "step": 4469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:50.062183", + "step": 4469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003524158149957657, + "timestamp": "2025-09-10 02:47:50.065244", + "step": 4470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:50.118988", + "step": 4470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0075031062588095665, + "timestamp": "2025-09-10 02:47:50.121151", + "step": 4471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:50.174180", + "step": 4471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011620745994150639, + "timestamp": "2025-09-10 02:47:50.180186", + "step": 4472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:50.232779", + "step": 4472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010053029283881187, + "timestamp": "2025-09-10 02:47:50.235072", + "step": 4473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:50.288500", + "step": 4473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026370359119027853, + "timestamp": "2025-09-10 02:47:50.290865", + "step": 4474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:50.343895", + "step": 4474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014432991854846478, + "timestamp": "2025-09-10 02:47:50.345948", + "step": 4475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:47:50.398608", + "step": 4475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011551310308277607, + "timestamp": "2025-09-10 02:47:50.404498", + "step": 4476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:50.457243", + "step": 4476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017103832215070724, + "timestamp": "2025-09-10 02:47:50.467319", + "step": 4477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:50.520885", + "step": 4477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005751727614551783, + "timestamp": "2025-09-10 02:47:50.523316", + "step": 4478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:50.577079", + "step": 4478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008228392340242863, + "timestamp": "2025-09-10 02:47:50.579243", + "step": 4479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:50.632165", + "step": 4479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019313262309879065, + "timestamp": "2025-09-10 02:47:50.639600", + "step": 4480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:50.692395", + "step": 4480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002656809287145734, + "timestamp": "2025-09-10 02:47:50.694557", + "step": 4481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:50.747605", + "step": 4481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05354071408510208, + "timestamp": "2025-09-10 02:47:50.755806", + "step": 4482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:50.809249", + "step": 4482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005342540796846151, + "timestamp": "2025-09-10 02:47:50.811794", + "step": 4483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:50.865535", + "step": 4483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007268095854669809, + "timestamp": "2025-09-10 02:47:50.871591", + "step": 4484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:50.924482", + "step": 4484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012478465214371681, + "timestamp": "2025-09-10 02:47:50.931016", + "step": 4485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:50.984044", + "step": 4485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01635042019188404, + "timestamp": "2025-09-10 02:47:50.986440", + "step": 4486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:51.040629", + "step": 4486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008009052835404873, + "timestamp": "2025-09-10 02:47:51.042989", + "step": 4487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:51.096545", + "step": 4487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009476982988417149, + "timestamp": "2025-09-10 02:47:51.103613", + "step": 4488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:51.156347", + "step": 4488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029068950098007917, + "timestamp": "2025-09-10 02:47:51.162506", + "step": 4489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:51.217044", + "step": 4489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013975396286696196, + "timestamp": "2025-09-10 02:47:51.226856", + "step": 4490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:51.280672", + "step": 4490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0050372472032904625, + "timestamp": "2025-09-10 02:47:51.283128", + "step": 4491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:51.337786", + "step": 4491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002294789766892791, + "timestamp": "2025-09-10 02:47:51.348077", + "step": 4492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:47:51.414146", + "step": 4492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007329504587687552, + "timestamp": "2025-09-10 02:47:51.427400", + "step": 4493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:47:51.489710", + "step": 4493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010436500422656536, + "timestamp": "2025-09-10 02:47:51.500631", + "step": 4494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:51.554762", + "step": 4494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012955855345353484, + "timestamp": "2025-09-10 02:47:51.557071", + "step": 4495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:51.615039", + "step": 4495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005135711748152971, + "timestamp": "2025-09-10 02:47:51.626232", + "step": 4496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:51.680058", + "step": 4496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01051540020853281, + "timestamp": "2025-09-10 02:47:51.682602", + "step": 4497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:51.736962", + "step": 4497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002059691585600376, + "timestamp": "2025-09-10 02:47:51.740034", + "step": 4498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:51.794259", + "step": 4498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026808774564415216, + "timestamp": "2025-09-10 02:47:51.800331", + "step": 4499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:51.853902", + "step": 4499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028993524610996246, + "timestamp": "2025-09-10 02:47:51.861017", + "step": 4500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 4500", + "timestamp": "2025-09-10 02:47:52.316495", + "step": 4500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:52.375381", + "step": 4500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004151905421167612, + "timestamp": "2025-09-10 02:47:52.385609", + "step": 4501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:52.441643", + "step": 4501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008255732245743275, + "timestamp": "2025-09-10 02:47:52.443853", + "step": 4502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:52.498955", + "step": 4502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004449177533388138, + "timestamp": "2025-09-10 02:47:52.508718", + "step": 4503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:52.562780", + "step": 4503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021090207155793905, + "timestamp": "2025-09-10 02:47:52.568994", + "step": 4504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:52.622805", + "step": 4504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027156206779181957, + "timestamp": "2025-09-10 02:47:52.624931", + "step": 4505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:52.678005", + "step": 4505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007852545008063316, + "timestamp": "2025-09-10 02:47:52.680663", + "step": 4506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:47:52.733333", + "step": 4506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015431736595928669, + "timestamp": "2025-09-10 02:47:52.735688", + "step": 4507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:47:52.797065", + "step": 4507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024575102142989635, + "timestamp": "2025-09-10 02:47:52.808919", + "step": 4508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:52.862394", + "step": 4508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0045385705307126045, + "timestamp": "2025-09-10 02:47:52.864641", + "step": 4509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:52.919201", + "step": 4509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011222070083022118, + "timestamp": "2025-09-10 02:47:52.928979", + "step": 4510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:52.982985", + "step": 4510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019330204231664538, + "timestamp": "2025-09-10 02:47:52.985931", + "step": 4511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:53.040068", + "step": 4511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0077409460209310055, + "timestamp": "2025-09-10 02:47:53.046634", + "step": 4512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:53.099528", + "step": 4512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020350662525743246, + "timestamp": "2025-09-10 02:47:53.102428", + "step": 4513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:47:53.163653", + "step": 4513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008962714113295078, + "timestamp": "2025-09-10 02:47:53.174537", + "step": 4514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:53.233054", + "step": 4514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005773603916168213, + "timestamp": "2025-09-10 02:47:53.235525", + "step": 4515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:53.290255", + "step": 4515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005459383130073547, + "timestamp": "2025-09-10 02:47:53.296732", + "step": 4516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:53.350979", + "step": 4516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025894774589687586, + "timestamp": "2025-09-10 02:47:53.353330", + "step": 4517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:47:53.413894", + "step": 4517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017120438860729337, + "timestamp": "2025-09-10 02:47:53.423494", + "step": 4518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:53.481695", + "step": 4518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028368426486849785, + "timestamp": "2025-09-10 02:47:53.483848", + "step": 4519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:53.537150", + "step": 4519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00105516635812819, + "timestamp": "2025-09-10 02:47:53.543305", + "step": 4520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:47:53.609654", + "step": 4520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007711261510848999, + "timestamp": "2025-09-10 02:47:53.621409", + "step": 4521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:53.675707", + "step": 4521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012892426457256079, + "timestamp": "2025-09-10 02:47:53.678340", + "step": 4522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:53.733422", + "step": 4522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004775486886501312, + "timestamp": "2025-09-10 02:47:53.739571", + "step": 4523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:53.800958", + "step": 4523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003999762120656669, + "timestamp": "2025-09-10 02:47:53.808036", + "step": 4524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:53.861900", + "step": 4524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004046570393256843, + "timestamp": "2025-09-10 02:47:53.867983", + "step": 4525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:53.921685", + "step": 4525, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.798386988928542e-05, + "timestamp": "2025-09-10 02:47:53.924127", + "step": 4526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:53.979318", + "step": 4526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021559493616223335, + "timestamp": "2025-09-10 02:47:53.981596", + "step": 4527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:54.036351", + "step": 4527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032169115729629993, + "timestamp": "2025-09-10 02:47:54.042721", + "step": 4528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:54.099529", + "step": 4528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032137926667928696, + "timestamp": "2025-09-10 02:47:54.101669", + "step": 4529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:54.155066", + "step": 4529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012281983159482479, + "timestamp": "2025-09-10 02:47:54.157497", + "step": 4530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:54.211395", + "step": 4530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012799688847735524, + "timestamp": "2025-09-10 02:47:54.213781", + "step": 4531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:54.267229", + "step": 4531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017885107081383467, + "timestamp": "2025-09-10 02:47:54.276003", + "step": 4532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:54.329359", + "step": 4532, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023809725418686867, + "timestamp": "2025-09-10 02:47:54.339831", + "step": 4533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:54.393415", + "step": 4533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007906203158199787, + "timestamp": "2025-09-10 02:47:54.397160", + "step": 4534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:54.451596", + "step": 4534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025535523891448975, + "timestamp": "2025-09-10 02:47:54.454018", + "step": 4535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:54.506763", + "step": 4535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009981702081859112, + "timestamp": "2025-09-10 02:47:54.512813", + "step": 4536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:47:54.569669", + "step": 4536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011019224300980568, + "timestamp": "2025-09-10 02:47:54.580813", + "step": 4537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:54.635269", + "step": 4537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006600281689316034, + "timestamp": "2025-09-10 02:47:54.637584", + "step": 4538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:54.691005", + "step": 4538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014997757971286774, + "timestamp": "2025-09-10 02:47:54.693746", + "step": 4539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:54.748166", + "step": 4539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03488391265273094, + "timestamp": "2025-09-10 02:47:54.754490", + "step": 4540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:47:54.807900", + "step": 4540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03410203754901886, + "timestamp": "2025-09-10 02:47:54.814001", + "step": 4541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:54.867686", + "step": 4541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005616106558591127, + "timestamp": "2025-09-10 02:47:54.869947", + "step": 4542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:54.923566", + "step": 4542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002888392424210906, + "timestamp": "2025-09-10 02:47:54.925786", + "step": 4543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:47:54.986884", + "step": 4543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003357083071023226, + "timestamp": "2025-09-10 02:47:54.998589", + "step": 4544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:47:55.052090", + "step": 4544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001123654656112194, + "timestamp": "2025-09-10 02:47:55.054538", + "step": 4545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:55.107650", + "step": 4545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016921380534768105, + "timestamp": "2025-09-10 02:47:55.109962", + "step": 4546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:47:55.163094", + "step": 4546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00217248173430562, + "timestamp": "2025-09-10 02:47:55.165938", + "step": 4547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:47:55.238831", + "step": 4547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025616373866796494, + "timestamp": "2025-09-10 02:47:55.253038", + "step": 4548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:55.307367", + "step": 4548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028280389960855246, + "timestamp": "2025-09-10 02:47:55.309594", + "step": 4549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:47:55.363361", + "step": 4549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00040746951708570123, + "timestamp": "2025-09-10 02:47:55.365659", + "step": 4550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:47:55.420347", + "step": 4550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00039629219099879265, + "timestamp": "2025-09-10 02:47:55.430149", + "step": 4551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:47:55.484181", + "step": 4551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00037533658905886114, + "timestamp": "2025-09-10 02:47:55.490277", + "step": 4552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:47:55.543180", + "step": 4552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002133527770638466, + "timestamp": "2025-09-10 02:47:55.551319", + "step": 4553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:47:55.605657", + "step": 4553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039528352208435535, + "timestamp": "2025-09-10 02:47:55.608146", + "step": 4554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:47:55.661503", + "step": 4554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00594740454107523, + "timestamp": "2025-09-10 02:47:55.664112", + "step": 4555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:47:55.717314", + "step": 4555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005808423273265362, + "timestamp": "2025-09-10 02:47:55.723678", + "step": 4556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:47:55.777210", + "step": 4556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002037533326074481, + "timestamp": "2025-09-10 02:47:55.779289", + "step": 4557, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:48:12.783083", + "step": 4557, + "epoch": 2 + }, + { + "type": "pplx", + "content": 25457045.702036124, + "timestamp": "2025-09-10 02:48:12.786430", + "step": 4557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:12.841461", + "step": 4557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013372708344832063, + "timestamp": "2025-09-10 02:48:12.843615", + "step": 4558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:12.897776", + "step": 4558, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003109157260041684, + "timestamp": "2025-09-10 02:48:12.900285", + "step": 4559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:12.953566", + "step": 4559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015070393681526184, + "timestamp": "2025-09-10 02:48:12.960039", + "step": 4560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:13.013580", + "step": 4560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005185488611459732, + "timestamp": "2025-09-10 02:48:13.016046", + "step": 4561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:13.069234", + "step": 4561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019084502127952874, + "timestamp": "2025-09-10 02:48:13.072021", + "step": 4562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:13.125914", + "step": 4562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009027626365423203, + "timestamp": "2025-09-10 02:48:13.135561", + "step": 4563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:13.188957", + "step": 4563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008088427712209523, + "timestamp": "2025-09-10 02:48:13.195039", + "step": 4564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:13.247718", + "step": 4564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0062088086269795895, + "timestamp": "2025-09-10 02:48:13.254192", + "step": 4565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:13.308462", + "step": 4565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019485211232677102, + "timestamp": "2025-09-10 02:48:13.311277", + "step": 4566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:13.365492", + "step": 4566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016602884279564023, + "timestamp": "2025-09-10 02:48:13.368452", + "step": 4567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:13.421602", + "step": 4567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0047590769827365875, + "timestamp": "2025-09-10 02:48:13.427489", + "step": 4568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:13.480485", + "step": 4568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018140958854928613, + "timestamp": "2025-09-10 02:48:13.482995", + "step": 4569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:13.536373", + "step": 4569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002656914002727717, + "timestamp": "2025-09-10 02:48:13.538599", + "step": 4570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:13.592165", + "step": 4570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005687407101504505, + "timestamp": "2025-09-10 02:48:13.594770", + "step": 4571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:13.648266", + "step": 4571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03455723077058792, + "timestamp": "2025-09-10 02:48:13.657289", + "step": 4572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:13.710169", + "step": 4572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022473115473985672, + "timestamp": "2025-09-10 02:48:13.713135", + "step": 4573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:13.766542", + "step": 4573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03626299276947975, + "timestamp": "2025-09-10 02:48:13.769665", + "step": 4574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:13.822892", + "step": 4574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009046868421137333, + "timestamp": "2025-09-10 02:48:13.825215", + "step": 4575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:13.878591", + "step": 4575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004490011197049171, + "timestamp": "2025-09-10 02:48:13.884666", + "step": 4576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:13.937358", + "step": 4576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014431804418563843, + "timestamp": "2025-09-10 02:48:13.947611", + "step": 4577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:14.002526", + "step": 4577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01755792647600174, + "timestamp": "2025-09-10 02:48:14.004836", + "step": 4578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:14.058529", + "step": 4578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009076729184016585, + "timestamp": "2025-09-10 02:48:14.061292", + "step": 4579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:14.114465", + "step": 4579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002662291517481208, + "timestamp": "2025-09-10 02:48:14.120625", + "step": 4580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:14.173975", + "step": 4580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013915960444137454, + "timestamp": "2025-09-10 02:48:14.180639", + "step": 4581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:14.234678", + "step": 4581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004539549816399813, + "timestamp": "2025-09-10 02:48:14.241045", + "step": 4582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:14.294925", + "step": 4582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00540183624252677, + "timestamp": "2025-09-10 02:48:14.304562", + "step": 4583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:14.358173", + "step": 4583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001248289947398007, + "timestamp": "2025-09-10 02:48:14.364415", + "step": 4584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:14.416709", + "step": 4584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003251541347708553, + "timestamp": "2025-09-10 02:48:14.418898", + "step": 4585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:14.473044", + "step": 4585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0060363272204995155, + "timestamp": "2025-09-10 02:48:14.475205", + "step": 4586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:14.527786", + "step": 4586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009328167070634663, + "timestamp": "2025-09-10 02:48:14.530051", + "step": 4587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:48:14.603351", + "step": 4587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024946587160229683, + "timestamp": "2025-09-10 02:48:14.617833", + "step": 4588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:14.671049", + "step": 4588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025679387617856264, + "timestamp": "2025-09-10 02:48:14.677694", + "step": 4589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:14.730941", + "step": 4589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016521627083420753, + "timestamp": "2025-09-10 02:48:14.734152", + "step": 4590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:14.787113", + "step": 4590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009985518408939242, + "timestamp": "2025-09-10 02:48:14.789353", + "step": 4591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:14.842617", + "step": 4591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015227339463308454, + "timestamp": "2025-09-10 02:48:14.848595", + "step": 4592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:48:14.915137", + "step": 4592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00046674496843479574, + "timestamp": "2025-09-10 02:48:14.928780", + "step": 4593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:14.982447", + "step": 4593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017164714518003166, + "timestamp": "2025-09-10 02:48:14.984934", + "step": 4594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:15.038909", + "step": 4594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0115129379555583, + "timestamp": "2025-09-10 02:48:15.041370", + "step": 4595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:15.094916", + "step": 4595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01486434880644083, + "timestamp": "2025-09-10 02:48:15.101132", + "step": 4596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:15.153942", + "step": 4596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007304795552045107, + "timestamp": "2025-09-10 02:48:15.161906", + "step": 4597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:15.216088", + "step": 4597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001580969081260264, + "timestamp": "2025-09-10 02:48:15.218240", + "step": 4598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:15.276675", + "step": 4598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038990986067801714, + "timestamp": "2025-09-10 02:48:15.287065", + "step": 4599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:15.342447", + "step": 4599, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.045212119817733765, + "timestamp": "2025-09-10 02:48:15.348986", + "step": 4600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:15.402806", + "step": 4600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011401698924601078, + "timestamp": "2025-09-10 02:48:15.405241", + "step": 4601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:15.458613", + "step": 4601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001965171832125634, + "timestamp": "2025-09-10 02:48:15.460821", + "step": 4602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:48:15.535232", + "step": 4602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004868685267865658, + "timestamp": "2025-09-10 02:48:15.549158", + "step": 4603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:15.603224", + "step": 4603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001720248837955296, + "timestamp": "2025-09-10 02:48:15.610345", + "step": 4604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:15.663532", + "step": 4604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006380091421306133, + "timestamp": "2025-09-10 02:48:15.669676", + "step": 4605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:15.722482", + "step": 4605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000943073071539402, + "timestamp": "2025-09-10 02:48:15.724874", + "step": 4606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:15.778402", + "step": 4606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018165066838264465, + "timestamp": "2025-09-10 02:48:15.780882", + "step": 4607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:15.834454", + "step": 4607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024254433810710907, + "timestamp": "2025-09-10 02:48:15.840739", + "step": 4608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:48:15.907229", + "step": 4608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012426173780113459, + "timestamp": "2025-09-10 02:48:15.920808", + "step": 4609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:15.977184", + "step": 4609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008221525349654257, + "timestamp": "2025-09-10 02:48:15.983013", + "step": 4610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:16.037876", + "step": 4610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019179824739694595, + "timestamp": "2025-09-10 02:48:16.040034", + "step": 4611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:16.094342", + "step": 4611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018849697662517428, + "timestamp": "2025-09-10 02:48:16.100518", + "step": 4612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:16.155728", + "step": 4612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005814618896692991, + "timestamp": "2025-09-10 02:48:16.158427", + "step": 4613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:48:16.219164", + "step": 4613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006916634738445282, + "timestamp": "2025-09-10 02:48:16.230045", + "step": 4614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:16.283305", + "step": 4614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006601746194064617, + "timestamp": "2025-09-10 02:48:16.285473", + "step": 4615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:16.339057", + "step": 4615, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001805769861675799, + "timestamp": "2025-09-10 02:48:16.345074", + "step": 4616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:16.397654", + "step": 4616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014912709593772888, + "timestamp": "2025-09-10 02:48:16.399492", + "step": 4617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:16.452396", + "step": 4617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016789039364084601, + "timestamp": "2025-09-10 02:48:16.458872", + "step": 4618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:16.512887", + "step": 4618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015009819529950619, + "timestamp": "2025-09-10 02:48:16.514944", + "step": 4619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:48:16.581171", + "step": 4619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000642686674837023, + "timestamp": "2025-09-10 02:48:16.594187", + "step": 4620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:16.646979", + "step": 4620, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002797394699882716, + "timestamp": "2025-09-10 02:48:16.649028", + "step": 4621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:16.701895", + "step": 4621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00236245128326118, + "timestamp": "2025-09-10 02:48:16.710104", + "step": 4622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:16.767951", + "step": 4622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010664359433576465, + "timestamp": "2025-09-10 02:48:16.778323", + "step": 4623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:16.832155", + "step": 4623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032224238384515047, + "timestamp": "2025-09-10 02:48:16.842546", + "step": 4624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:16.895455", + "step": 4624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012770752073265612, + "timestamp": "2025-09-10 02:48:16.897775", + "step": 4625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:16.951123", + "step": 4625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004707627522293478, + "timestamp": "2025-09-10 02:48:16.960739", + "step": 4626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:17.015417", + "step": 4626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031918175518512726, + "timestamp": "2025-09-10 02:48:17.017831", + "step": 4627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:17.075896", + "step": 4627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00040983394137583673, + "timestamp": "2025-09-10 02:48:17.087095", + "step": 4628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:48:17.141273", + "step": 4628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00035925681004300714, + "timestamp": "2025-09-10 02:48:17.151804", + "step": 4629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:17.204776", + "step": 4629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009273489005863667, + "timestamp": "2025-09-10 02:48:17.206902", + "step": 4630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:17.259553", + "step": 4630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020392243459355086, + "timestamp": "2025-09-10 02:48:17.261664", + "step": 4631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:48:17.315539", + "step": 4631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00037995242746546865, + "timestamp": "2025-09-10 02:48:17.321515", + "step": 4632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:17.373896", + "step": 4632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006583416252397001, + "timestamp": "2025-09-10 02:48:17.375962", + "step": 4633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:17.429133", + "step": 4633, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005158471409231424, + "timestamp": "2025-09-10 02:48:17.431326", + "step": 4634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:17.484568", + "step": 4634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025188740342855453, + "timestamp": "2025-09-10 02:48:17.486448", + "step": 4635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:17.539696", + "step": 4635, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006703072227537632, + "timestamp": "2025-09-10 02:48:17.545776", + "step": 4636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:17.598134", + "step": 4636, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010374571429565549, + "timestamp": "2025-09-10 02:48:17.604558", + "step": 4637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:17.657636", + "step": 4637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020061612303834409, + "timestamp": "2025-09-10 02:48:17.659960", + "step": 4638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:48:17.721362", + "step": 4638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005455473903566599, + "timestamp": "2025-09-10 02:48:17.732474", + "step": 4639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:48:17.793523", + "step": 4639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005772277945652604, + "timestamp": "2025-09-10 02:48:17.805224", + "step": 4640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:17.857909", + "step": 4640, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000132946006488055, + "timestamp": "2025-09-10 02:48:17.859997", + "step": 4641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:48:17.920673", + "step": 4641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029071313329041004, + "timestamp": "2025-09-10 02:48:17.931570", + "step": 4642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:17.984585", + "step": 4642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02414119616150856, + "timestamp": "2025-09-10 02:48:17.986756", + "step": 4643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:18.039395", + "step": 4643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009457221603952348, + "timestamp": "2025-09-10 02:48:18.045427", + "step": 4644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:18.097890", + "step": 4644, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003954698913730681, + "timestamp": "2025-09-10 02:48:18.099968", + "step": 4645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:18.152777", + "step": 4645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03588508442044258, + "timestamp": "2025-09-10 02:48:18.154863", + "step": 4646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:18.207590", + "step": 4646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022157442290335894, + "timestamp": "2025-09-10 02:48:18.209790", + "step": 4647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:48:18.264206", + "step": 4647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004063074884470552, + "timestamp": "2025-09-10 02:48:18.274799", + "step": 4648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:18.327280", + "step": 4648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003567532985471189, + "timestamp": "2025-09-10 02:48:18.329467", + "step": 4649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:18.382658", + "step": 4649, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009528659866191447, + "timestamp": "2025-09-10 02:48:18.384789", + "step": 4650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:18.437897", + "step": 4650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01627149060368538, + "timestamp": "2025-09-10 02:48:18.444389", + "step": 4651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:18.497743", + "step": 4651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009323515696451068, + "timestamp": "2025-09-10 02:48:18.505149", + "step": 4652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:18.558549", + "step": 4652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004460910859052092, + "timestamp": "2025-09-10 02:48:18.560882", + "step": 4653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:18.613277", + "step": 4653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00522157596424222, + "timestamp": "2025-09-10 02:48:18.615378", + "step": 4654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:18.669237", + "step": 4654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0289036575704813, + "timestamp": "2025-09-10 02:48:18.671873", + "step": 4655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:18.724852", + "step": 4655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002348503563553095, + "timestamp": "2025-09-10 02:48:18.730606", + "step": 4656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:18.783259", + "step": 4656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012411413714289665, + "timestamp": "2025-09-10 02:48:18.791544", + "step": 4657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:18.845196", + "step": 4657, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018182327039539814, + "timestamp": "2025-09-10 02:48:18.847437", + "step": 4658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:18.900685", + "step": 4658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004816818982362747, + "timestamp": "2025-09-10 02:48:18.903024", + "step": 4659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:18.956289", + "step": 4659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005066471057944, + "timestamp": "2025-09-10 02:48:18.965176", + "step": 4660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:19.021462", + "step": 4660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009116308647207916, + "timestamp": "2025-09-10 02:48:19.032691", + "step": 4661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:19.086617", + "step": 4661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004365415778011084, + "timestamp": "2025-09-10 02:48:19.088768", + "step": 4662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:48:19.154999", + "step": 4662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002365898573771119, + "timestamp": "2025-09-10 02:48:19.167211", + "step": 4663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:19.220232", + "step": 4663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018584148492664099, + "timestamp": "2025-09-10 02:48:19.226112", + "step": 4664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:48:19.297487", + "step": 4664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016713660210371017, + "timestamp": "2025-09-10 02:48:19.312346", + "step": 4665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:19.365392", + "step": 4665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005889121908694506, + "timestamp": "2025-09-10 02:48:19.368182", + "step": 4666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:19.421471", + "step": 4666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004000083077698946, + "timestamp": "2025-09-10 02:48:19.423896", + "step": 4667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:19.477187", + "step": 4667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011162602342665195, + "timestamp": "2025-09-10 02:48:19.483272", + "step": 4668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:19.535333", + "step": 4668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012390923220664263, + "timestamp": "2025-09-10 02:48:19.537946", + "step": 4669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:19.590800", + "step": 4669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01437478419393301, + "timestamp": "2025-09-10 02:48:19.594610", + "step": 4670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:19.650163", + "step": 4670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00072527612792328, + "timestamp": "2025-09-10 02:48:19.658289", + "step": 4671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:19.711292", + "step": 4671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011226385831832886, + "timestamp": "2025-09-10 02:48:19.718713", + "step": 4672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:19.771107", + "step": 4672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017921874532476068, + "timestamp": "2025-09-10 02:48:19.773790", + "step": 4673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:48:19.826000", + "step": 4673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002091957489028573, + "timestamp": "2025-09-10 02:48:19.828295", + "step": 4674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:19.881403", + "step": 4674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037636582273989916, + "timestamp": "2025-09-10 02:48:19.883590", + "step": 4675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:48:19.938231", + "step": 4675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002588507952168584, + "timestamp": "2025-09-10 02:48:19.948843", + "step": 4676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:48:20.001702", + "step": 4676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01437581516802311, + "timestamp": "2025-09-10 02:48:20.003893", + "step": 4677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:20.056603", + "step": 4677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011934679932892323, + "timestamp": "2025-09-10 02:48:20.058674", + "step": 4678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:48:20.118779", + "step": 4678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010685628280043602, + "timestamp": "2025-09-10 02:48:20.129538", + "step": 4679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:48:20.190072", + "step": 4679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011842173989862204, + "timestamp": "2025-09-10 02:48:20.201578", + "step": 4680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:20.254373", + "step": 4680, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002382299571763724, + "timestamp": "2025-09-10 02:48:20.256678", + "step": 4681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:20.309801", + "step": 4681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007638778071850538, + "timestamp": "2025-09-10 02:48:20.318192", + "step": 4682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:20.371375", + "step": 4682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008221337338909507, + "timestamp": "2025-09-10 02:48:20.377835", + "step": 4683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:48:20.440046", + "step": 4683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001319772214628756, + "timestamp": "2025-09-10 02:48:20.451903", + "step": 4684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:48:20.523253", + "step": 4684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011567100882530212, + "timestamp": "2025-09-10 02:48:20.537856", + "step": 4685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:20.591333", + "step": 4685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013520864304155111, + "timestamp": "2025-09-10 02:48:20.593695", + "step": 4686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:20.646258", + "step": 4686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019370978698134422, + "timestamp": "2025-09-10 02:48:20.648643", + "step": 4687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:20.706814", + "step": 4687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015971229004207999, + "timestamp": "2025-09-10 02:48:20.718001", + "step": 4688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:20.770816", + "step": 4688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033498455304652452, + "timestamp": "2025-09-10 02:48:20.777381", + "step": 4689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:20.830537", + "step": 4689, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008158961310982704, + "timestamp": "2025-09-10 02:48:20.832545", + "step": 4690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:20.886314", + "step": 4690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00515906885266304, + "timestamp": "2025-09-10 02:48:20.895930", + "step": 4691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:20.951705", + "step": 4691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014080966357141733, + "timestamp": "2025-09-10 02:48:20.957417", + "step": 4692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:21.009766", + "step": 4692, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003290967782959342, + "timestamp": "2025-09-10 02:48:21.011805", + "step": 4693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:21.064731", + "step": 4693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00014995354285929352, + "timestamp": "2025-09-10 02:48:21.066872", + "step": 4694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:48:21.139643", + "step": 4694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011130459606647491, + "timestamp": "2025-09-10 02:48:21.153137", + "step": 4695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:21.206891", + "step": 4695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019666498701553792, + "timestamp": "2025-09-10 02:48:21.212765", + "step": 4696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:21.265304", + "step": 4696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002782369265332818, + "timestamp": "2025-09-10 02:48:21.267426", + "step": 4697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:21.325709", + "step": 4697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003994309343397617, + "timestamp": "2025-09-10 02:48:21.336151", + "step": 4698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:48:21.406141", + "step": 4698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012797446688637137, + "timestamp": "2025-09-10 02:48:21.419021", + "step": 4699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:21.472635", + "step": 4699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034619185607880354, + "timestamp": "2025-09-10 02:48:21.478307", + "step": 4700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:21.533445", + "step": 4700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008053782512433827, + "timestamp": "2025-09-10 02:48:21.536448", + "step": 4701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:21.588821", + "step": 4701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00028124029631726444, + "timestamp": "2025-09-10 02:48:21.591950", + "step": 4702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:21.645124", + "step": 4702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019142018631100655, + "timestamp": "2025-09-10 02:48:21.647362", + "step": 4703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:21.700464", + "step": 4703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001146473572589457, + "timestamp": "2025-09-10 02:48:21.706252", + "step": 4704, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:48:38.513132", + "step": 4704, + "epoch": 2 + }, + { + "type": "pplx", + "content": 26641143.515721954, + "timestamp": "2025-09-10 02:48:38.515988", + "step": 4704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:38.570906", + "step": 4704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001589340390637517, + "timestamp": "2025-09-10 02:48:38.575623", + "step": 4705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:38.630044", + "step": 4705, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007871562265790999, + "timestamp": "2025-09-10 02:48:38.632106", + "step": 4706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:48:38.692299", + "step": 4706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006433350499719381, + "timestamp": "2025-09-10 02:48:38.702963", + "step": 4707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:38.756896", + "step": 4707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000366387510439381, + "timestamp": "2025-09-10 02:48:38.763217", + "step": 4708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:48:38.857429", + "step": 4708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033369511365890503, + "timestamp": "2025-09-10 02:48:38.877658", + "step": 4709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:38.932360", + "step": 4709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001281169825233519, + "timestamp": "2025-09-10 02:48:38.938927", + "step": 4710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:38.993895", + "step": 4710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014966449234634638, + "timestamp": "2025-09-10 02:48:38.995985", + "step": 4711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:39.049666", + "step": 4711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005338889081031084, + "timestamp": "2025-09-10 02:48:39.055782", + "step": 4712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:48:39.134232", + "step": 4712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003673503815662116, + "timestamp": "2025-09-10 02:48:39.150677", + "step": 4713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 3, + 224 + ], + "flops": 3360020475552.0 + }, + "timestamp": "2025-09-10 02:48:39.205986", + "step": 4713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020906708959955722, + "timestamp": "2025-09-10 02:48:39.207936", + "step": 4714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:39.264899", + "step": 4714, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016857580340001732, + "timestamp": "2025-09-10 02:48:39.267867", + "step": 4715, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:39.320394", + "step": 4715, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032327771186828613, + "timestamp": "2025-09-10 02:48:39.326125", + "step": 4716, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:39.378801", + "step": 4716, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03402525931596756, + "timestamp": "2025-09-10 02:48:39.380921", + "step": 4717, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:39.434058", + "step": 4717, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011595349496928975, + "timestamp": "2025-09-10 02:48:39.436176", + "step": 4718, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:39.489723", + "step": 4718, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.1871651521651074e-05, + "timestamp": "2025-09-10 02:48:39.491662", + "step": 4719, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:39.544442", + "step": 4719, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013515396043658257, + "timestamp": "2025-09-10 02:48:39.550348", + "step": 4720, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:39.602639", + "step": 4720, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05121804028749466, + "timestamp": "2025-09-10 02:48:39.605014", + "step": 4721, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:39.658105", + "step": 4721, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01933814212679863, + "timestamp": "2025-09-10 02:48:39.660338", + "step": 4722, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:39.713685", + "step": 4722, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010275743901729584, + "timestamp": "2025-09-10 02:48:39.719858", + "step": 4723, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:39.774461", + "step": 4723, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006337855011224747, + "timestamp": "2025-09-10 02:48:39.780467", + "step": 4724, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:39.833427", + "step": 4724, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01716405712068081, + "timestamp": "2025-09-10 02:48:39.835423", + "step": 4725, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:39.888207", + "step": 4725, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015138440765440464, + "timestamp": "2025-09-10 02:48:39.890371", + "step": 4726, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:39.943586", + "step": 4726, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028167726122774184, + "timestamp": "2025-09-10 02:48:39.945666", + "step": 4727, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:40.003293", + "step": 4727, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019934328156523407, + "timestamp": "2025-09-10 02:48:40.014504", + "step": 4728, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:40.067476", + "step": 4728, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015965728380251676, + "timestamp": "2025-09-10 02:48:40.069652", + "step": 4729, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:40.122719", + "step": 4729, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.707252320367843e-05, + "timestamp": "2025-09-10 02:48:40.124905", + "step": 4730, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:40.178909", + "step": 4730, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008025681599974632, + "timestamp": "2025-09-10 02:48:40.188479", + "step": 4731, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:40.241130", + "step": 4731, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008100520935840905, + "timestamp": "2025-09-10 02:48:40.246942", + "step": 4732, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:40.299419", + "step": 4732, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005326542071998119, + "timestamp": "2025-09-10 02:48:40.305849", + "step": 4733, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:40.358973", + "step": 4733, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006007250631228089, + "timestamp": "2025-09-10 02:48:40.361107", + "step": 4734, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:40.414473", + "step": 4734, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01471441239118576, + "timestamp": "2025-09-10 02:48:40.420575", + "step": 4735, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:40.475641", + "step": 4735, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024760139058344066, + "timestamp": "2025-09-10 02:48:40.481652", + "step": 4736, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:40.534767", + "step": 4736, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013098502531647682, + "timestamp": "2025-09-10 02:48:40.537024", + "step": 4737, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:40.590018", + "step": 4737, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006232666200958192, + "timestamp": "2025-09-10 02:48:40.592019", + "step": 4738, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:40.645300", + "step": 4738, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004891558084636927, + "timestamp": "2025-09-10 02:48:40.647507", + "step": 4739, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:40.700569", + "step": 4739, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015102754696272314, + "timestamp": "2025-09-10 02:48:40.706721", + "step": 4740, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:40.759339", + "step": 4740, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001452394062653184, + "timestamp": "2025-09-10 02:48:40.761392", + "step": 4741, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:48:40.814396", + "step": 4741, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003562500060070306, + "timestamp": "2025-09-10 02:48:40.816627", + "step": 4742, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:40.870131", + "step": 4742, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06023650988936424, + "timestamp": "2025-09-10 02:48:40.872446", + "step": 4743, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:48:40.932716", + "step": 4743, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.696308603044599e-05, + "timestamp": "2025-09-10 02:48:40.944199", + "step": 4744, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:40.997185", + "step": 4744, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001707561023067683, + "timestamp": "2025-09-10 02:48:40.999383", + "step": 4745, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:41.052586", + "step": 4745, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002241963054984808, + "timestamp": "2025-09-10 02:48:41.054886", + "step": 4746, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:41.107942", + "step": 4746, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00402917992323637, + "timestamp": "2025-09-10 02:48:41.110134", + "step": 4747, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:41.162795", + "step": 4747, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.023892248049378395, + "timestamp": "2025-09-10 02:48:41.168730", + "step": 4748, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:41.221394", + "step": 4748, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004089613794349134, + "timestamp": "2025-09-10 02:48:41.227304", + "step": 4749, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:48:41.280072", + "step": 4749, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010775496251881123, + "timestamp": "2025-09-10 02:48:41.282379", + "step": 4750, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:41.335293", + "step": 4750, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013157210196368396, + "timestamp": "2025-09-10 02:48:41.337692", + "step": 4751, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:41.391002", + "step": 4751, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024290860164910555, + "timestamp": "2025-09-10 02:48:41.396782", + "step": 4752, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:41.449340", + "step": 4752, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014459015801548958, + "timestamp": "2025-09-10 02:48:41.451467", + "step": 4753, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:41.504000", + "step": 4753, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003907266538590193, + "timestamp": "2025-09-10 02:48:41.506067", + "step": 4754, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:41.559211", + "step": 4754, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007426393683999777, + "timestamp": "2025-09-10 02:48:41.561377", + "step": 4755, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:41.614304", + "step": 4755, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004347166686784476, + "timestamp": "2025-09-10 02:48:41.620532", + "step": 4756, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:41.677486", + "step": 4756, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002476779278367758, + "timestamp": "2025-09-10 02:48:41.688646", + "step": 4757, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:41.742612", + "step": 4757, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012231871369294822, + "timestamp": "2025-09-10 02:48:41.744689", + "step": 4758, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:41.798254", + "step": 4758, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04061596468091011, + "timestamp": "2025-09-10 02:48:41.800371", + "step": 4759, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:41.854046", + "step": 4759, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006013612262904644, + "timestamp": "2025-09-10 02:48:41.859798", + "step": 4760, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:41.912840", + "step": 4760, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035146118607372046, + "timestamp": "2025-09-10 02:48:41.919208", + "step": 4761, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:41.972288", + "step": 4761, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005703885108232498, + "timestamp": "2025-09-10 02:48:41.974351", + "step": 4762, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:42.026976", + "step": 4762, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011439550435170531, + "timestamp": "2025-09-10 02:48:42.035067", + "step": 4763, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:42.088533", + "step": 4763, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.025727814063429832, + "timestamp": "2025-09-10 02:48:42.097406", + "step": 4764, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:48:42.150715", + "step": 4764, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00408810842782259, + "timestamp": "2025-09-10 02:48:42.153030", + "step": 4765, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 928 + ], + "flops": 18560112737920.0 + }, + "timestamp": "2025-09-10 02:48:42.284640", + "step": 4765, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004506801720708609, + "timestamp": "2025-09-10 02:48:42.310447", + "step": 4766, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:42.363972", + "step": 4766, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010416925884783268, + "timestamp": "2025-09-10 02:48:42.366035", + "step": 4767, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:42.419009", + "step": 4767, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001127948984503746, + "timestamp": "2025-09-10 02:48:42.426244", + "step": 4768, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:42.479884", + "step": 4768, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012796514667570591, + "timestamp": "2025-09-10 02:48:42.482024", + "step": 4769, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:48:42.548201", + "step": 4769, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016542102093808353, + "timestamp": "2025-09-10 02:48:42.560393", + "step": 4770, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:42.613267", + "step": 4770, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020784097723662853, + "timestamp": "2025-09-10 02:48:42.615556", + "step": 4771, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:42.668980", + "step": 4771, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010669670067727566, + "timestamp": "2025-09-10 02:48:42.674674", + "step": 4772, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:42.727133", + "step": 4772, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004837530665099621, + "timestamp": "2025-09-10 02:48:42.733482", + "step": 4773, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:42.786881", + "step": 4773, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010128377471119165, + "timestamp": "2025-09-10 02:48:42.789885", + "step": 4774, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:42.843563", + "step": 4774, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03209507092833519, + "timestamp": "2025-09-10 02:48:42.853196", + "step": 4775, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:42.906169", + "step": 4775, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008237884379923344, + "timestamp": "2025-09-10 02:48:42.911991", + "step": 4776, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:42.964468", + "step": 4776, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006681117229163647, + "timestamp": "2025-09-10 02:48:42.967455", + "step": 4777, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:43.020293", + "step": 4777, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007087221019901335, + "timestamp": "2025-09-10 02:48:43.022491", + "step": 4778, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:48:43.091403", + "step": 4778, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005340483039617538, + "timestamp": "2025-09-10 02:48:43.103976", + "step": 4779, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:43.156963", + "step": 4779, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003317827358841896, + "timestamp": "2025-09-10 02:48:43.162744", + "step": 4780, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:43.215071", + "step": 4780, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00177658477332443, + "timestamp": "2025-09-10 02:48:43.217339", + "step": 4781, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:43.270264", + "step": 4781, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03338702768087387, + "timestamp": "2025-09-10 02:48:43.272567", + "step": 4782, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:43.325768", + "step": 4782, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010354549624025822, + "timestamp": "2025-09-10 02:48:43.328690", + "step": 4783, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:48:43.397582", + "step": 4783, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037773135118186474, + "timestamp": "2025-09-10 02:48:43.410929", + "step": 4784, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:43.463686", + "step": 4784, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000469871360110119, + "timestamp": "2025-09-10 02:48:43.465777", + "step": 4785, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:48:43.520547", + "step": 4785, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008968977257609367, + "timestamp": "2025-09-10 02:48:43.530295", + "step": 4786, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:43.583455", + "step": 4786, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.035639386624097824, + "timestamp": "2025-09-10 02:48:43.585607", + "step": 4787, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:43.639006", + "step": 4787, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011272707488387823, + "timestamp": "2025-09-10 02:48:43.644729", + "step": 4788, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:43.697313", + "step": 4788, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010113457217812538, + "timestamp": "2025-09-10 02:48:43.705439", + "step": 4789, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:48:43.772010", + "step": 4789, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008517818641848862, + "timestamp": "2025-09-10 02:48:43.784231", + "step": 4790, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:43.837947", + "step": 4790, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006375905591994524, + "timestamp": "2025-09-10 02:48:43.840061", + "step": 4791, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:48:43.892767", + "step": 4791, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003986644558608532, + "timestamp": "2025-09-10 02:48:43.898724", + "step": 4792, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:43.955072", + "step": 4792, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004787076264619827, + "timestamp": "2025-09-10 02:48:43.966260", + "step": 4793, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:48:44.032554", + "step": 4793, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003805638989433646, + "timestamp": "2025-09-10 02:48:44.044754", + "step": 4794, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:44.098384", + "step": 4794, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030001818668097258, + "timestamp": "2025-09-10 02:48:44.100607", + "step": 4795, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:44.154296", + "step": 4795, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013813378289341927, + "timestamp": "2025-09-10 02:48:44.164711", + "step": 4796, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:44.217660", + "step": 4796, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0305621474981308, + "timestamp": "2025-09-10 02:48:44.224116", + "step": 4797, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:44.277105", + "step": 4797, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004716162104159594, + "timestamp": "2025-09-10 02:48:44.279603", + "step": 4798, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:44.332918", + "step": 4798, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004502739757299423, + "timestamp": "2025-09-10 02:48:44.335169", + "step": 4799, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:48:44.396447", + "step": 4799, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013405581936240196, + "timestamp": "2025-09-10 02:48:44.407837", + "step": 4800, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:44.461533", + "step": 4800, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004024644382297993, + "timestamp": "2025-09-10 02:48:44.463595", + "step": 4801, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:44.517223", + "step": 4801, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016606238204985857, + "timestamp": "2025-09-10 02:48:44.519464", + "step": 4802, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:48:44.594682", + "step": 4802, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006496304180473089, + "timestamp": "2025-09-10 02:48:44.608584", + "step": 4803, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:44.662108", + "step": 4803, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014956875704228878, + "timestamp": "2025-09-10 02:48:44.667994", + "step": 4804, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:44.720593", + "step": 4804, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029476068448275328, + "timestamp": "2025-09-10 02:48:44.722551", + "step": 4805, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:44.780268", + "step": 4805, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006885406328365207, + "timestamp": "2025-09-10 02:48:44.790778", + "step": 4806, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:44.844346", + "step": 4806, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019481521099805832, + "timestamp": "2025-09-10 02:48:44.846498", + "step": 4807, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:44.899681", + "step": 4807, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005319296615198255, + "timestamp": "2025-09-10 02:48:44.905422", + "step": 4808, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:48:44.977358", + "step": 4808, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003924211021512747, + "timestamp": "2025-09-10 02:48:44.992279", + "step": 4809, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:45.045418", + "step": 4809, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008448751643300056, + "timestamp": "2025-09-10 02:48:45.047512", + "step": 4810, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:45.100544", + "step": 4810, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014836247079074383, + "timestamp": "2025-09-10 02:48:45.103671", + "step": 4811, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:45.156795", + "step": 4811, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006818660767748952, + "timestamp": "2025-09-10 02:48:45.162477", + "step": 4812, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:45.215072", + "step": 4812, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029938272200524807, + "timestamp": "2025-09-10 02:48:45.216999", + "step": 4813, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:45.270379", + "step": 4813, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004919454921036959, + "timestamp": "2025-09-10 02:48:45.272307", + "step": 4814, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:48:45.325671", + "step": 4814, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012119380757212639, + "timestamp": "2025-09-10 02:48:45.327728", + "step": 4815, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:45.380672", + "step": 4815, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0178728885948658, + "timestamp": "2025-09-10 02:48:45.386766", + "step": 4816, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:45.439324", + "step": 4816, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.025599464774131775, + "timestamp": "2025-09-10 02:48:45.441909", + "step": 4817, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:45.495879", + "step": 4817, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018236581236124039, + "timestamp": "2025-09-10 02:48:45.505326", + "step": 4818, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:45.560401", + "step": 4818, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005904998979531229, + "timestamp": "2025-09-10 02:48:45.566948", + "step": 4819, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:45.623007", + "step": 4819, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014317615423351526, + "timestamp": "2025-09-10 02:48:45.630938", + "step": 4820, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:45.687356", + "step": 4820, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009754039347171783, + "timestamp": "2025-09-10 02:48:45.689500", + "step": 4821, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:45.744222", + "step": 4821, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008487998507916927, + "timestamp": "2025-09-10 02:48:45.746330", + "step": 4822, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:45.800206", + "step": 4822, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020899279043078423, + "timestamp": "2025-09-10 02:48:45.805770", + "step": 4823, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:45.859906", + "step": 4823, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005726119852624834, + "timestamp": "2025-09-10 02:48:45.866380", + "step": 4824, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:48:45.919037", + "step": 4824, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001260301098227501, + "timestamp": "2025-09-10 02:48:45.929480", + "step": 4825, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:45.983276", + "step": 4825, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008040931425057352, + "timestamp": "2025-09-10 02:48:45.985599", + "step": 4826, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:46.039115", + "step": 4826, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002123631536960602, + "timestamp": "2025-09-10 02:48:46.041009", + "step": 4827, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:46.093781", + "step": 4827, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033926458563655615, + "timestamp": "2025-09-10 02:48:46.099769", + "step": 4828, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:46.152405", + "step": 4828, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02737729251384735, + "timestamp": "2025-09-10 02:48:46.154637", + "step": 4829, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:46.207606", + "step": 4829, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016665494767948985, + "timestamp": "2025-09-10 02:48:46.209917", + "step": 4830, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:46.268398", + "step": 4830, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0068314154632389545, + "timestamp": "2025-09-10 02:48:46.278827", + "step": 4831, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:48:46.332506", + "step": 4831, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024365218356251717, + "timestamp": "2025-09-10 02:48:46.338584", + "step": 4832, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:48:46.392005", + "step": 4832, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02425851859152317, + "timestamp": "2025-09-10 02:48:46.402425", + "step": 4833, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:48:46.460371", + "step": 4833, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010536868358030915, + "timestamp": "2025-09-10 02:48:46.470770", + "step": 4834, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:46.524530", + "step": 4834, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006592122372239828, + "timestamp": "2025-09-10 02:48:46.534163", + "step": 4835, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:46.587279", + "step": 4835, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006792788044549525, + "timestamp": "2025-09-10 02:48:46.594377", + "step": 4836, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:46.647217", + "step": 4836, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01451940555125475, + "timestamp": "2025-09-10 02:48:46.649648", + "step": 4837, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:48:46.703048", + "step": 4837, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004953315947204828, + "timestamp": "2025-09-10 02:48:46.710962", + "step": 4838, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:48:46.764220", + "step": 4838, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0067435139790177345, + "timestamp": "2025-09-10 02:48:46.767234", + "step": 4839, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:48:46.820777", + "step": 4839, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026406829711049795, + "timestamp": "2025-09-10 02:48:46.827753", + "step": 4840, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:48:46.888591", + "step": 4840, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005143395159393549, + "timestamp": "2025-09-10 02:48:46.900672", + "step": 4841, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:46.953799", + "step": 4841, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0041872295551002026, + "timestamp": "2025-09-10 02:48:46.956005", + "step": 4842, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:48:47.010985", + "step": 4842, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008241079631261528, + "timestamp": "2025-09-10 02:48:47.020779", + "step": 4843, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:48:47.073983", + "step": 4843, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004373212810605764, + "timestamp": "2025-09-10 02:48:47.080286", + "step": 4844, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:47.134141", + "step": 4844, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009122295305132866, + "timestamp": "2025-09-10 02:48:47.136447", + "step": 4845, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:48:47.189606", + "step": 4845, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0046147494576871395, + "timestamp": "2025-09-10 02:48:47.191913", + "step": 4846, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:48:47.245612", + "step": 4846, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003591356333345175, + "timestamp": "2025-09-10 02:48:47.247665", + "step": 4847, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:48:47.300939", + "step": 4847, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0074340240098536015, + "timestamp": "2025-09-10 02:48:47.307203", + "step": 4848, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:47.360981", + "step": 4848, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001226637396030128, + "timestamp": "2025-09-10 02:48:47.370276", + "step": 4849, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:48:47.424521", + "step": 4849, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021831209305673838, + "timestamp": "2025-09-10 02:48:47.426955", + "step": 4850, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:48:47.481216", + "step": 4850, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013943219557404518, + "timestamp": "2025-09-10 02:48:47.490835", + "step": 4851, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:49:04.366381", + "step": 4851, + "epoch": 3 + }, + { + "type": "pplx", + "content": 24295117.98900996, + "timestamp": "2025-09-10 02:49:04.369527", + "step": 4851, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:04.425386", + "step": 4851, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014960489934310317, + "timestamp": "2025-09-10 02:49:04.431915", + "step": 4852, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:04.485742", + "step": 4852, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007174062775447965, + "timestamp": "2025-09-10 02:49:04.488064", + "step": 4853, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:04.541693", + "step": 4853, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016206667060032487, + "timestamp": "2025-09-10 02:49:04.544139", + "step": 4854, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:04.604845", + "step": 4854, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01007130742073059, + "timestamp": "2025-09-10 02:49:04.615540", + "step": 4855, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:04.669571", + "step": 4855, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004926626104861498, + "timestamp": "2025-09-10 02:49:04.675892", + "step": 4856, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:04.729198", + "step": 4856, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002526621101424098, + "timestamp": "2025-09-10 02:49:04.731545", + "step": 4857, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:04.785083", + "step": 4857, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022045696096029133, + "timestamp": "2025-09-10 02:49:04.788045", + "step": 4858, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:04.842506", + "step": 4858, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036227802047505975, + "timestamp": "2025-09-10 02:49:04.848676", + "step": 4859, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:04.903731", + "step": 4859, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006822288502007723, + "timestamp": "2025-09-10 02:49:04.910075", + "step": 4860, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:04.964079", + "step": 4860, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033081471920013428, + "timestamp": "2025-09-10 02:49:04.969912", + "step": 4861, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:05.023789", + "step": 4861, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016923480434343219, + "timestamp": "2025-09-10 02:49:05.026615", + "step": 4862, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:05.080912", + "step": 4862, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007131525198929012, + "timestamp": "2025-09-10 02:49:05.083524", + "step": 4863, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:05.139873", + "step": 4863, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001632165745832026, + "timestamp": "2025-09-10 02:49:05.146707", + "step": 4864, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:49:05.206915", + "step": 4864, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004180103540420532, + "timestamp": "2025-09-10 02:49:05.218636", + "step": 4865, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:05.275303", + "step": 4865, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026041711680591106, + "timestamp": "2025-09-10 02:49:05.279647", + "step": 4866, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:49:05.333279", + "step": 4866, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0217380840331316, + "timestamp": "2025-09-10 02:49:05.342910", + "step": 4867, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:05.400225", + "step": 4867, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028752118814736605, + "timestamp": "2025-09-10 02:49:05.411655", + "step": 4868, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:05.465552", + "step": 4868, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047668040497228503, + "timestamp": "2025-09-10 02:49:05.476105", + "step": 4869, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:05.530876", + "step": 4869, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003315207315608859, + "timestamp": "2025-09-10 02:49:05.536099", + "step": 4870, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:49:05.592808", + "step": 4870, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009684692486189306, + "timestamp": "2025-09-10 02:49:05.595043", + "step": 4871, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:05.652537", + "step": 4871, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004432265181094408, + "timestamp": "2025-09-10 02:49:05.662877", + "step": 4872, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:05.717028", + "step": 4872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006341114640235901, + "timestamp": "2025-09-10 02:49:05.727531", + "step": 4873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:05.781377", + "step": 4873, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015499379951506853, + "timestamp": "2025-09-10 02:49:05.783672", + "step": 4874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:05.836538", + "step": 4874, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006216306239366531, + "timestamp": "2025-09-10 02:49:05.838673", + "step": 4875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:05.891506", + "step": 4875, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010528519051149487, + "timestamp": "2025-09-10 02:49:05.897741", + "step": 4876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:05.951359", + "step": 4876, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003758539678528905, + "timestamp": "2025-09-10 02:49:05.954406", + "step": 4877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:06.008998", + "step": 4877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003981443587690592, + "timestamp": "2025-09-10 02:49:06.011388", + "step": 4878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:06.066153", + "step": 4878, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017698196461424232, + "timestamp": "2025-09-10 02:49:06.071234", + "step": 4879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:06.131813", + "step": 4879, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004895442631095648, + "timestamp": "2025-09-10 02:49:06.138809", + "step": 4880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:06.191355", + "step": 4880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026843196246773005, + "timestamp": "2025-09-10 02:49:06.201363", + "step": 4881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:06.268037", + "step": 4881, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005960598355159163, + "timestamp": "2025-09-10 02:49:06.278715", + "step": 4882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:06.337112", + "step": 4882, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0051751406863331795, + "timestamp": "2025-09-10 02:49:06.347527", + "step": 4883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:06.401433", + "step": 4883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045295237214304507, + "timestamp": "2025-09-10 02:49:06.407739", + "step": 4884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:06.460418", + "step": 4884, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009586476953700185, + "timestamp": "2025-09-10 02:49:06.462574", + "step": 4885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:06.517293", + "step": 4885, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002113823313266039, + "timestamp": "2025-09-10 02:49:06.527090", + "step": 4886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:06.590260", + "step": 4886, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005936753004789352, + "timestamp": "2025-09-10 02:49:06.601021", + "step": 4887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:06.654695", + "step": 4887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013274262892082334, + "timestamp": "2025-09-10 02:49:06.660630", + "step": 4888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:06.718881", + "step": 4888, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005245764274150133, + "timestamp": "2025-09-10 02:49:06.720957", + "step": 4889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:06.775784", + "step": 4889, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001503094914369285, + "timestamp": "2025-09-10 02:49:06.785528", + "step": 4890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:06.838239", + "step": 4890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000990046188235283, + "timestamp": "2025-09-10 02:49:06.840670", + "step": 4891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:49:06.931321", + "step": 4891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016281692078337073, + "timestamp": "2025-09-10 02:49:06.949262", + "step": 4892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:07.004095", + "step": 4892, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026771408738568425, + "timestamp": "2025-09-10 02:49:07.014624", + "step": 4893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:07.068436", + "step": 4893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003569223335944116, + "timestamp": "2025-09-10 02:49:07.070747", + "step": 4894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:07.127781", + "step": 4894, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002908846829086542, + "timestamp": "2025-09-10 02:49:07.130092", + "step": 4895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:07.190617", + "step": 4895, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000855074729770422, + "timestamp": "2025-09-10 02:49:07.202090", + "step": 4896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:49:07.262260", + "step": 4896, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017130032181739807, + "timestamp": "2025-09-10 02:49:07.274307", + "step": 4897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:07.327874", + "step": 4897, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000767638732213527, + "timestamp": "2025-09-10 02:49:07.330218", + "step": 4898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:07.383228", + "step": 4898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016307687386870384, + "timestamp": "2025-09-10 02:49:07.385377", + "step": 4899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:07.438024", + "step": 4899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028657265938818455, + "timestamp": "2025-09-10 02:49:07.443790", + "step": 4900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:07.496198", + "step": 4900, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.104109681677073e-05, + "timestamp": "2025-09-10 02:49:07.498572", + "step": 4901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:07.551374", + "step": 4901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004295418504625559, + "timestamp": "2025-09-10 02:49:07.558076", + "step": 4902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:07.611613", + "step": 4902, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006380677223205566, + "timestamp": "2025-09-10 02:49:07.613998", + "step": 4903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:07.667679", + "step": 4903, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020270653476472944, + "timestamp": "2025-09-10 02:49:07.673467", + "step": 4904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:07.725601", + "step": 4904, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031658937223255634, + "timestamp": "2025-09-10 02:49:07.729020", + "step": 4905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:49:07.791154", + "step": 4905, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007741497363895178, + "timestamp": "2025-09-10 02:49:07.802090", + "step": 4906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:07.854893", + "step": 4906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006203799042850733, + "timestamp": "2025-09-10 02:49:07.857862", + "step": 4907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:07.910992", + "step": 4907, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005209214286878705, + "timestamp": "2025-09-10 02:49:07.916932", + "step": 4908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:07.969235", + "step": 4908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008703137864358723, + "timestamp": "2025-09-10 02:49:07.971407", + "step": 4909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:08.023941", + "step": 4909, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03252455219626427, + "timestamp": "2025-09-10 02:49:08.027113", + "step": 4910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:08.079919", + "step": 4910, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003201315994374454, + "timestamp": "2025-09-10 02:49:08.086527", + "step": 4911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:08.139698", + "step": 4911, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007507981499657035, + "timestamp": "2025-09-10 02:49:08.145643", + "step": 4912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:08.197933", + "step": 4912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024523703381419182, + "timestamp": "2025-09-10 02:49:08.200011", + "step": 4913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:08.253142", + "step": 4913, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.382607534760609e-05, + "timestamp": "2025-09-10 02:49:08.261405", + "step": 4914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:08.315062", + "step": 4914, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006074230768717825, + "timestamp": "2025-09-10 02:49:08.317886", + "step": 4915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:08.371470", + "step": 4915, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013656097871717066, + "timestamp": "2025-09-10 02:49:08.377207", + "step": 4916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:08.431408", + "step": 4916, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.140331530943513e-05, + "timestamp": "2025-09-10 02:49:08.433606", + "step": 4917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:08.486494", + "step": 4917, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008503682911396027, + "timestamp": "2025-09-10 02:49:08.489682", + "step": 4918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:08.542174", + "step": 4918, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.4097786333877593e-05, + "timestamp": "2025-09-10 02:49:08.545182", + "step": 4919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:08.598621", + "step": 4919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008876653737388551, + "timestamp": "2025-09-10 02:49:08.604363", + "step": 4920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:08.656771", + "step": 4920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001713093719445169, + "timestamp": "2025-09-10 02:49:08.658968", + "step": 4921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:08.712900", + "step": 4921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036111727240495384, + "timestamp": "2025-09-10 02:49:08.722520", + "step": 4922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:08.776201", + "step": 4922, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00629039341583848, + "timestamp": "2025-09-10 02:49:08.779114", + "step": 4923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:08.832124", + "step": 4923, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018623418873175979, + "timestamp": "2025-09-10 02:49:08.837823", + "step": 4924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:08.890049", + "step": 4924, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023605269379913807, + "timestamp": "2025-09-10 02:49:08.892909", + "step": 4925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:08.945712", + "step": 4925, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.923226126469672e-05, + "timestamp": "2025-09-10 02:49:08.948289", + "step": 4926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:09.001102", + "step": 4926, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005650994135066867, + "timestamp": "2025-09-10 02:49:09.003279", + "step": 4927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:09.056645", + "step": 4927, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001354710548184812, + "timestamp": "2025-09-10 02:49:09.062499", + "step": 4928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:09.115163", + "step": 4928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007866756059229374, + "timestamp": "2025-09-10 02:49:09.117259", + "step": 4929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:09.169987", + "step": 4929, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015856897516641766, + "timestamp": "2025-09-10 02:49:09.172139", + "step": 4930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:09.224756", + "step": 4930, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006528622470796108, + "timestamp": "2025-09-10 02:49:09.226961", + "step": 4931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:09.284890", + "step": 4931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00742186838760972, + "timestamp": "2025-09-10 02:49:09.296090", + "step": 4932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:09.348259", + "step": 4932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001926262048073113, + "timestamp": "2025-09-10 02:49:09.350470", + "step": 4933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:09.405137", + "step": 4933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009631103835999966, + "timestamp": "2025-09-10 02:49:09.414925", + "step": 4934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:09.475685", + "step": 4934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002042563573922962, + "timestamp": "2025-09-10 02:49:09.486393", + "step": 4935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:09.539442", + "step": 4935, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008414814947172999, + "timestamp": "2025-09-10 02:49:09.545439", + "step": 4936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:09.598042", + "step": 4936, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015261145017575473, + "timestamp": "2025-09-10 02:49:09.600107", + "step": 4937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:09.652779", + "step": 4937, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004089429508894682, + "timestamp": "2025-09-10 02:49:09.654891", + "step": 4938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:49:09.735060", + "step": 4938, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035414841840974987, + "timestamp": "2025-09-10 02:49:09.750009", + "step": 4939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:09.803981", + "step": 4939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018595749512314796, + "timestamp": "2025-09-10 02:49:09.812776", + "step": 4940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:09.865404", + "step": 4940, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.842367969918996e-05, + "timestamp": "2025-09-10 02:49:09.873540", + "step": 4941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:09.927169", + "step": 4941, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004058366175740957, + "timestamp": "2025-09-10 02:49:09.929310", + "step": 4942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:09.987517", + "step": 4942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021893209486734122, + "timestamp": "2025-09-10 02:49:09.997927", + "step": 4943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:10.052642", + "step": 4943, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018335158238187432, + "timestamp": "2025-09-10 02:49:10.063188", + "step": 4944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:49:10.128422", + "step": 4944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025715469382703304, + "timestamp": "2025-09-10 02:49:10.141635", + "step": 4945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:10.195038", + "step": 4945, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006101075559854507, + "timestamp": "2025-09-10 02:49:10.201469", + "step": 4946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:10.255393", + "step": 4946, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011807610280811787, + "timestamp": "2025-09-10 02:49:10.265031", + "step": 4947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:10.318303", + "step": 4947, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.031693945056759e-05, + "timestamp": "2025-09-10 02:49:10.328706", + "step": 4948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:10.380955", + "step": 4948, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015000006183981895, + "timestamp": "2025-09-10 02:49:10.383200", + "step": 4949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:10.436444", + "step": 4949, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000620223639998585, + "timestamp": "2025-09-10 02:49:10.438945", + "step": 4950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:10.492174", + "step": 4950, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006793139036744833, + "timestamp": "2025-09-10 02:49:10.495165", + "step": 4951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:10.547935", + "step": 4951, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.225173951359466e-05, + "timestamp": "2025-09-10 02:49:10.553726", + "step": 4952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:10.606381", + "step": 4952, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.438864430587273e-05, + "timestamp": "2025-09-10 02:49:10.608829", + "step": 4953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:49:10.674891", + "step": 4953, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03761579841375351, + "timestamp": "2025-09-10 02:49:10.687081", + "step": 4954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:10.740259", + "step": 4954, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.660187839064747e-05, + "timestamp": "2025-09-10 02:49:10.743197", + "step": 4955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:10.796284", + "step": 4955, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002929875859990716, + "timestamp": "2025-09-10 02:49:10.801990", + "step": 4956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:10.854372", + "step": 4956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031684644636698067, + "timestamp": "2025-09-10 02:49:10.856570", + "step": 4957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:49:10.909171", + "step": 4957, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001288395025767386, + "timestamp": "2025-09-10 02:49:10.911406", + "step": 4958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:10.965087", + "step": 4958, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015277406200766563, + "timestamp": "2025-09-10 02:49:10.967183", + "step": 4959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:11.019919", + "step": 4959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038321936153806746, + "timestamp": "2025-09-10 02:49:11.025599", + "step": 4960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:11.077688", + "step": 4960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033514885581098497, + "timestamp": "2025-09-10 02:49:11.080727", + "step": 4961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:49:11.149013", + "step": 4961, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003143365611322224, + "timestamp": "2025-09-10 02:49:11.161617", + "step": 4962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:11.215773", + "step": 4962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028298699762672186, + "timestamp": "2025-09-10 02:49:11.217991", + "step": 4963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:11.276032", + "step": 4963, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010795405978569761, + "timestamp": "2025-09-10 02:49:11.287256", + "step": 4964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:11.339690", + "step": 4964, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016476513701491058, + "timestamp": "2025-09-10 02:49:11.342009", + "step": 4965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:11.396455", + "step": 4965, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.192130316165276e-05, + "timestamp": "2025-09-10 02:49:11.406303", + "step": 4966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:11.459362", + "step": 4966, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.839994547481183e-05, + "timestamp": "2025-09-10 02:49:11.461591", + "step": 4967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:11.514981", + "step": 4967, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019118456402793527, + "timestamp": "2025-09-10 02:49:11.520730", + "step": 4968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:11.573564", + "step": 4968, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003673020051792264, + "timestamp": "2025-09-10 02:49:11.575604", + "step": 4969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:11.628497", + "step": 4969, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025563675444573164, + "timestamp": "2025-09-10 02:49:11.631623", + "step": 4970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:11.684595", + "step": 4970, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006253737956285477, + "timestamp": "2025-09-10 02:49:11.687528", + "step": 4971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:11.741234", + "step": 4971, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.789296018425375e-05, + "timestamp": "2025-09-10 02:49:11.747119", + "step": 4972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:11.799301", + "step": 4972, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.892937770113349e-05, + "timestamp": "2025-09-10 02:49:11.801475", + "step": 4973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:11.854459", + "step": 4973, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008523422293365002, + "timestamp": "2025-09-10 02:49:11.857621", + "step": 4974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:11.915952", + "step": 4974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002624673070386052, + "timestamp": "2025-09-10 02:49:11.926340", + "step": 4975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:11.979223", + "step": 4975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.09043518453836441, + "timestamp": "2025-09-10 02:49:11.985055", + "step": 4976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:49:12.037002", + "step": 4976, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.042494259774684906, + "timestamp": "2025-09-10 02:49:12.039072", + "step": 4977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:12.091943", + "step": 4977, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0104117002338171, + "timestamp": "2025-09-10 02:49:12.100053", + "step": 4978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:12.153082", + "step": 4978, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007600211538374424, + "timestamp": "2025-09-10 02:49:12.156033", + "step": 4979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:12.213830", + "step": 4979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011665706988424063, + "timestamp": "2025-09-10 02:49:12.225051", + "step": 4980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:12.277620", + "step": 4980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0048175351694226265, + "timestamp": "2025-09-10 02:49:12.280658", + "step": 4981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:49:12.332840", + "step": 4981, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004247081466019154, + "timestamp": "2025-09-10 02:49:12.335062", + "step": 4982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:12.387638", + "step": 4982, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05138136073946953, + "timestamp": "2025-09-10 02:49:12.389849", + "step": 4983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:12.443886", + "step": 4983, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.509895658586174e-05, + "timestamp": "2025-09-10 02:49:12.454422", + "step": 4984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:12.506171", + "step": 4984, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012163658393546939, + "timestamp": "2025-09-10 02:49:12.508557", + "step": 4985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:12.560959", + "step": 4985, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004981536767445505, + "timestamp": "2025-09-10 02:49:12.564110", + "step": 4986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:12.616640", + "step": 4986, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047296658158302307, + "timestamp": "2025-09-10 02:49:12.618848", + "step": 4987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:12.671397", + "step": 4987, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001558628136990592, + "timestamp": "2025-09-10 02:49:12.677081", + "step": 4988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:12.729095", + "step": 4988, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.702091478568036e-05, + "timestamp": "2025-09-10 02:49:12.731205", + "step": 4989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:12.784264", + "step": 4989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011864184634760022, + "timestamp": "2025-09-10 02:49:12.786384", + "step": 4990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:12.846556", + "step": 4990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002848926931619644, + "timestamp": "2025-09-10 02:49:12.857284", + "step": 4991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:12.910135", + "step": 4991, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.387175795272924e-05, + "timestamp": "2025-09-10 02:49:12.916067", + "step": 4992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:12.968200", + "step": 4992, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013732888328377157, + "timestamp": "2025-09-10 02:49:12.970618", + "step": 4993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:13.023629", + "step": 4993, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011589375790208578, + "timestamp": "2025-09-10 02:49:13.025843", + "step": 4994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:13.079100", + "step": 4994, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.879437180235982e-05, + "timestamp": "2025-09-10 02:49:13.088756", + "step": 4995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:49:13.141190", + "step": 4995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007933162851259112, + "timestamp": "2025-09-10 02:49:13.146947", + "step": 4996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:13.199162", + "step": 4996, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00863623432815075, + "timestamp": "2025-09-10 02:49:13.201285", + "step": 4997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:13.253933", + "step": 4997, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005718530621379614, + "timestamp": "2025-09-10 02:49:13.256121", + "step": 4998, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:49:30.200505", + "step": 4998, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23973688.272886705, + "timestamp": "2025-09-10 02:49:30.204895", + "step": 4998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:49:30.265785", + "step": 4998, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005250006448477507, + "timestamp": "2025-09-10 02:49:30.269041", + "step": 4999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:30.323789", + "step": 4999, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.246975969290361e-05, + "timestamp": "2025-09-10 02:49:30.329845", + "step": 5000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 5000", + "timestamp": "2025-09-10 02:49:30.824877", + "step": 5000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:30.884237", + "step": 5000, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025869489181786776, + "timestamp": "2025-09-10 02:49:30.887783", + "step": 5001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:30.942689", + "step": 5001, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045246342779137194, + "timestamp": "2025-09-10 02:49:30.944776", + "step": 5002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:30.999017", + "step": 5002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013516661419998854, + "timestamp": "2025-09-10 02:49:31.004280", + "step": 5003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:31.058092", + "step": 5003, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020484435372054577, + "timestamp": "2025-09-10 02:49:31.064349", + "step": 5004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:31.117185", + "step": 5004, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.250265480251983e-05, + "timestamp": "2025-09-10 02:49:31.127085", + "step": 5005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:31.181185", + "step": 5005, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010583539551589638, + "timestamp": "2025-09-10 02:49:31.189358", + "step": 5006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:31.243400", + "step": 5006, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0494500957429409, + "timestamp": "2025-09-10 02:49:31.249151", + "step": 5007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:31.302434", + "step": 5007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012285331031307578, + "timestamp": "2025-09-10 02:49:31.308707", + "step": 5008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:31.367568", + "step": 5008, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039893705979920924, + "timestamp": "2025-09-10 02:49:31.379183", + "step": 5009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:49:31.441173", + "step": 5009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000498550885822624, + "timestamp": "2025-09-10 02:49:31.452294", + "step": 5010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:31.505685", + "step": 5010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007543464889749885, + "timestamp": "2025-09-10 02:49:31.507883", + "step": 5011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:31.561157", + "step": 5011, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007778530125506222, + "timestamp": "2025-09-10 02:49:31.567254", + "step": 5012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:49:31.631975", + "step": 5012, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005997510743327439, + "timestamp": "2025-09-10 02:49:31.645046", + "step": 5013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:49:31.699326", + "step": 5013, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007705023162998259, + "timestamp": "2025-09-10 02:49:31.701563", + "step": 5014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:31.755132", + "step": 5014, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002069078851491213, + "timestamp": "2025-09-10 02:49:31.757665", + "step": 5015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:31.811469", + "step": 5015, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06012476235628128, + "timestamp": "2025-09-10 02:49:31.818436", + "step": 5016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:31.871662", + "step": 5016, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005363817908801138, + "timestamp": "2025-09-10 02:49:31.878083", + "step": 5017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:31.931714", + "step": 5017, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006101771141402423, + "timestamp": "2025-09-10 02:49:31.935321", + "step": 5018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:31.991494", + "step": 5018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008906811126507819, + "timestamp": "2025-09-10 02:49:31.998090", + "step": 5019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:32.051264", + "step": 5019, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037788457120768726, + "timestamp": "2025-09-10 02:49:32.060042", + "step": 5020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:32.112968", + "step": 5020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01288488321006298, + "timestamp": "2025-09-10 02:49:32.123456", + "step": 5021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:32.177243", + "step": 5021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032534441561438143, + "timestamp": "2025-09-10 02:49:32.180193", + "step": 5022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:32.233414", + "step": 5022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004104897961951792, + "timestamp": "2025-09-10 02:49:32.235881", + "step": 5023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:32.289703", + "step": 5023, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006058880826458335, + "timestamp": "2025-09-10 02:49:32.295749", + "step": 5024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:32.348316", + "step": 5024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007953226566314697, + "timestamp": "2025-09-10 02:49:32.350767", + "step": 5025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:49:32.412534", + "step": 5025, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005587108316831291, + "timestamp": "2025-09-10 02:49:32.423616", + "step": 5026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:32.483797", + "step": 5026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013259410625323653, + "timestamp": "2025-09-10 02:49:32.494451", + "step": 5027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:32.549829", + "step": 5027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001348756835795939, + "timestamp": "2025-09-10 02:49:32.559172", + "step": 5028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:32.611941", + "step": 5028, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001706228475086391, + "timestamp": "2025-09-10 02:49:32.614138", + "step": 5029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:32.667144", + "step": 5029, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002383357612416148, + "timestamp": "2025-09-10 02:49:32.669530", + "step": 5030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:32.723098", + "step": 5030, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029474933398887515, + "timestamp": "2025-09-10 02:49:32.725403", + "step": 5031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:32.778779", + "step": 5031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028504797955974936, + "timestamp": "2025-09-10 02:49:32.787605", + "step": 5032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:32.840467", + "step": 5032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013638290110975504, + "timestamp": "2025-09-10 02:49:32.842985", + "step": 5033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:32.895778", + "step": 5033, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.578953566029668e-05, + "timestamp": "2025-09-10 02:49:32.902202", + "step": 5034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:49:32.968769", + "step": 5034, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013735241955146194, + "timestamp": "2025-09-10 02:49:32.981023", + "step": 5035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:33.036692", + "step": 5035, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004391853231936693, + "timestamp": "2025-09-10 02:49:33.042972", + "step": 5036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:33.097545", + "step": 5036, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014596188440918922, + "timestamp": "2025-09-10 02:49:33.108042", + "step": 5037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:33.161292", + "step": 5037, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002435219706967473, + "timestamp": "2025-09-10 02:49:33.163407", + "step": 5038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:33.215990", + "step": 5038, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006800977047532797, + "timestamp": "2025-09-10 02:49:33.218873", + "step": 5039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:33.271378", + "step": 5039, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022466571535915136, + "timestamp": "2025-09-10 02:49:33.277349", + "step": 5040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:33.330168", + "step": 5040, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.039408233016729355, + "timestamp": "2025-09-10 02:49:33.332275", + "step": 5041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:33.385025", + "step": 5041, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.263594256481156e-05, + "timestamp": "2025-09-10 02:49:33.391490", + "step": 5042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:33.444465", + "step": 5042, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003711421159096062, + "timestamp": "2025-09-10 02:49:33.446645", + "step": 5043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:33.499888", + "step": 5043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020662827882915735, + "timestamp": "2025-09-10 02:49:33.505891", + "step": 5044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:33.558962", + "step": 5044, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005662030889652669, + "timestamp": "2025-09-10 02:49:33.561175", + "step": 5045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:33.614915", + "step": 5045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003250579466111958, + "timestamp": "2025-09-10 02:49:33.620750", + "step": 5046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:33.675612", + "step": 5046, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03217030689120293, + "timestamp": "2025-09-10 02:49:33.678133", + "step": 5047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:33.731912", + "step": 5047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0071533904410898685, + "timestamp": "2025-09-10 02:49:33.738423", + "step": 5048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:33.791850", + "step": 5048, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002689636079594493, + "timestamp": "2025-09-10 02:49:33.794323", + "step": 5049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:33.847226", + "step": 5049, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031585918623022735, + "timestamp": "2025-09-10 02:49:33.849385", + "step": 5050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:33.902021", + "step": 5050, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002843406517058611, + "timestamp": "2025-09-10 02:49:33.904224", + "step": 5051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:33.957678", + "step": 5051, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015163521748036146, + "timestamp": "2025-09-10 02:49:33.963737", + "step": 5052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:34.016455", + "step": 5052, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024538072757422924, + "timestamp": "2025-09-10 02:49:34.019027", + "step": 5053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:34.072146", + "step": 5053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005008867592550814, + "timestamp": "2025-09-10 02:49:34.074483", + "step": 5054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:34.127898", + "step": 5054, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010241649579256773, + "timestamp": "2025-09-10 02:49:34.134387", + "step": 5055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:49:34.187612", + "step": 5055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022090300626587123, + "timestamp": "2025-09-10 02:49:34.193868", + "step": 5056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:34.245943", + "step": 5056, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01904134452342987, + "timestamp": "2025-09-10 02:49:34.248400", + "step": 5057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:34.301387", + "step": 5057, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023352052085101604, + "timestamp": "2025-09-10 02:49:34.303775", + "step": 5058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:34.357242", + "step": 5058, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011699604510795325, + "timestamp": "2025-09-10 02:49:34.366789", + "step": 5059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:34.420427", + "step": 5059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013018635101616383, + "timestamp": "2025-09-10 02:49:34.427135", + "step": 5060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:34.481243", + "step": 5060, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003083543269895017, + "timestamp": "2025-09-10 02:49:34.488384", + "step": 5061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:34.543261", + "step": 5061, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013074075104668736, + "timestamp": "2025-09-10 02:49:34.545932", + "step": 5062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:34.599589", + "step": 5062, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016092877835035324, + "timestamp": "2025-09-10 02:49:34.601835", + "step": 5063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:34.654412", + "step": 5063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013670714572072029, + "timestamp": "2025-09-10 02:49:34.660333", + "step": 5064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:34.712684", + "step": 5064, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033214528230018914, + "timestamp": "2025-09-10 02:49:34.720892", + "step": 5065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:34.782473", + "step": 5065, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003066713979933411, + "timestamp": "2025-09-10 02:49:34.784770", + "step": 5066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:34.841144", + "step": 5066, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003853857342619449, + "timestamp": "2025-09-10 02:49:34.845734", + "step": 5067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:34.899783", + "step": 5067, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026348425075411797, + "timestamp": "2025-09-10 02:49:34.906111", + "step": 5068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:34.968630", + "step": 5068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009287443826906383, + "timestamp": "2025-09-10 02:49:34.978910", + "step": 5069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:35.049323", + "step": 5069, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000837551022414118, + "timestamp": "2025-09-10 02:49:35.059708", + "step": 5070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:35.118026", + "step": 5070, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006919422303326428, + "timestamp": "2025-09-10 02:49:35.125440", + "step": 5071, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:49:35.225068", + "step": 5071, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003406129078939557, + "timestamp": "2025-09-10 02:49:35.244392", + "step": 5072, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:35.309614", + "step": 5072, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001750012452248484, + "timestamp": "2025-09-10 02:49:35.330136", + "step": 5073, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:35.386273", + "step": 5073, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000278674706351012, + "timestamp": "2025-09-10 02:49:35.389044", + "step": 5074, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:35.444267", + "step": 5074, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002639829181134701, + "timestamp": "2025-09-10 02:49:35.447791", + "step": 5075, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:35.505019", + "step": 5075, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012780151329934597, + "timestamp": "2025-09-10 02:49:35.511265", + "step": 5076, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:35.572708", + "step": 5076, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004734140355139971, + "timestamp": "2025-09-10 02:49:35.576691", + "step": 5077, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:35.637327", + "step": 5077, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007791816024109721, + "timestamp": "2025-09-10 02:49:35.647430", + "step": 5078, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:35.711728", + "step": 5078, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048580984002910554, + "timestamp": "2025-09-10 02:49:35.713947", + "step": 5079, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:35.771633", + "step": 5079, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017527417512610555, + "timestamp": "2025-09-10 02:49:35.777744", + "step": 5080, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:35.830808", + "step": 5080, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001240090699866414, + "timestamp": "2025-09-10 02:49:35.833980", + "step": 5081, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:35.888008", + "step": 5081, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012745247222483158, + "timestamp": "2025-09-10 02:49:35.890213", + "step": 5082, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:35.943378", + "step": 5082, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005714423605240881, + "timestamp": "2025-09-10 02:49:35.945703", + "step": 5083, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:35.999093", + "step": 5083, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004364429507404566, + "timestamp": "2025-09-10 02:49:36.005279", + "step": 5084, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:36.058944", + "step": 5084, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006739782984368503, + "timestamp": "2025-09-10 02:49:36.061000", + "step": 5085, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:36.114132", + "step": 5085, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015000805258750916, + "timestamp": "2025-09-10 02:49:36.122289", + "step": 5086, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:36.175309", + "step": 5086, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003139723849017173, + "timestamp": "2025-09-10 02:49:36.177362", + "step": 5087, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:36.231217", + "step": 5087, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011213402263820171, + "timestamp": "2025-09-10 02:49:36.237485", + "step": 5088, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:36.290225", + "step": 5088, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011400616494938731, + "timestamp": "2025-09-10 02:49:36.292377", + "step": 5089, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:36.348266", + "step": 5089, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010984824039041996, + "timestamp": "2025-09-10 02:49:36.358022", + "step": 5090, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:49:36.436135", + "step": 5090, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021188571117818356, + "timestamp": "2025-09-10 02:49:36.450188", + "step": 5091, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:36.504344", + "step": 5091, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044342773617245257, + "timestamp": "2025-09-10 02:49:36.510547", + "step": 5092, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:36.563058", + "step": 5092, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027311412850394845, + "timestamp": "2025-09-10 02:49:36.565069", + "step": 5093, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:36.617885", + "step": 5093, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017884830012917519, + "timestamp": "2025-09-10 02:49:36.620141", + "step": 5094, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:36.672704", + "step": 5094, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014469983289018273, + "timestamp": "2025-09-10 02:49:36.674937", + "step": 5095, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:36.727835", + "step": 5095, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005529725458472967, + "timestamp": "2025-09-10 02:49:36.734814", + "step": 5096, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:36.791895", + "step": 5096, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006478400900959969, + "timestamp": "2025-09-10 02:49:36.793897", + "step": 5097, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:36.847210", + "step": 5097, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015148741658776999, + "timestamp": "2025-09-10 02:49:36.853694", + "step": 5098, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:36.912311", + "step": 5098, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008421264588832855, + "timestamp": "2025-09-10 02:49:36.922777", + "step": 5099, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:36.979476", + "step": 5099, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008443708647973835, + "timestamp": "2025-09-10 02:49:36.985399", + "step": 5100, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:37.040582", + "step": 5100, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007894421578384936, + "timestamp": "2025-09-10 02:49:37.043043", + "step": 5101, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:49:37.103138", + "step": 5101, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032058203942142427, + "timestamp": "2025-09-10 02:49:37.113851", + "step": 5102, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:37.167893", + "step": 5102, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037115856539458036, + "timestamp": "2025-09-10 02:49:37.171343", + "step": 5103, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:37.227299", + "step": 5103, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011774562299251556, + "timestamp": "2025-09-10 02:49:37.233368", + "step": 5104, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:37.286911", + "step": 5104, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008899167296476662, + "timestamp": "2025-09-10 02:49:37.294993", + "step": 5105, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:37.350669", + "step": 5105, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008432075381278992, + "timestamp": "2025-09-10 02:49:37.353535", + "step": 5106, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:37.406386", + "step": 5106, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05210445076227188, + "timestamp": "2025-09-10 02:49:37.412589", + "step": 5107, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:37.466468", + "step": 5107, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000361327693099156, + "timestamp": "2025-09-10 02:49:37.478643", + "step": 5108, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:37.532028", + "step": 5108, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025633556651882827, + "timestamp": "2025-09-10 02:49:37.534060", + "step": 5109, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:37.588570", + "step": 5109, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002622759493533522, + "timestamp": "2025-09-10 02:49:37.598333", + "step": 5110, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:37.651638", + "step": 5110, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011523003922775388, + "timestamp": "2025-09-10 02:49:37.653826", + "step": 5111, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:37.709778", + "step": 5111, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017187556659337133, + "timestamp": "2025-09-10 02:49:37.715754", + "step": 5112, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:37.770046", + "step": 5112, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024591197725385427, + "timestamp": "2025-09-10 02:49:37.772102", + "step": 5113, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:37.824796", + "step": 5113, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047810334945097566, + "timestamp": "2025-09-10 02:49:37.827250", + "step": 5114, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:37.881585", + "step": 5114, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005543294828385115, + "timestamp": "2025-09-10 02:49:37.883870", + "step": 5115, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:37.937084", + "step": 5115, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003660088637843728, + "timestamp": "2025-09-10 02:49:37.944317", + "step": 5116, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:37.997548", + "step": 5116, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012102748733013868, + "timestamp": "2025-09-10 02:49:38.008053", + "step": 5117, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:38.061182", + "step": 5117, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013141623639967293, + "timestamp": "2025-09-10 02:49:38.063455", + "step": 5118, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:38.116627", + "step": 5118, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004812480474356562, + "timestamp": "2025-09-10 02:49:38.119063", + "step": 5119, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:38.171901", + "step": 5119, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001012549502775073, + "timestamp": "2025-09-10 02:49:38.177969", + "step": 5120, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:38.230770", + "step": 5120, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002350731287151575, + "timestamp": "2025-09-10 02:49:38.232896", + "step": 5121, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:38.286052", + "step": 5121, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006704625557176769, + "timestamp": "2025-09-10 02:49:38.295599", + "step": 5122, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:38.350779", + "step": 5122, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005529317422769964, + "timestamp": "2025-09-10 02:49:38.360523", + "step": 5123, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:38.414538", + "step": 5123, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003123534144833684, + "timestamp": "2025-09-10 02:49:38.420821", + "step": 5124, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:38.473361", + "step": 5124, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021540414541959763, + "timestamp": "2025-09-10 02:49:38.475574", + "step": 5125, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:38.528528", + "step": 5125, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003263267921283841, + "timestamp": "2025-09-10 02:49:38.530986", + "step": 5126, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:38.584117", + "step": 5126, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001052754931151867, + "timestamp": "2025-09-10 02:49:38.586157", + "step": 5127, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:38.639186", + "step": 5127, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006556085427291691, + "timestamp": "2025-09-10 02:49:38.645681", + "step": 5128, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:38.699377", + "step": 5128, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0044709183275699615, + "timestamp": "2025-09-10 02:49:38.701597", + "step": 5129, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:38.755212", + "step": 5129, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005360793438740075, + "timestamp": "2025-09-10 02:49:38.761761", + "step": 5130, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:38.815166", + "step": 5130, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006453011883422732, + "timestamp": "2025-09-10 02:49:38.821525", + "step": 5131, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:38.875459", + "step": 5131, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035264246980659664, + "timestamp": "2025-09-10 02:49:38.881786", + "step": 5132, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:38.934396", + "step": 5132, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012222418561577797, + "timestamp": "2025-09-10 02:49:38.940847", + "step": 5133, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:38.994385", + "step": 5133, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026795705780386925, + "timestamp": "2025-09-10 02:49:39.000928", + "step": 5134, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:39.053772", + "step": 5134, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008503907360136509, + "timestamp": "2025-09-10 02:49:39.061450", + "step": 5135, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:39.114124", + "step": 5135, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025715657975524664, + "timestamp": "2025-09-10 02:49:39.122672", + "step": 5136, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:39.175209", + "step": 5136, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005679276655428112, + "timestamp": "2025-09-10 02:49:39.181455", + "step": 5137, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:39.234465", + "step": 5137, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01844605803489685, + "timestamp": "2025-09-10 02:49:39.237375", + "step": 5138, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:39.290362", + "step": 5138, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000656695629004389, + "timestamp": "2025-09-10 02:49:39.292589", + "step": 5139, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:39.345960", + "step": 5139, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009054274414665997, + "timestamp": "2025-09-10 02:49:39.351903", + "step": 5140, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:39.404838", + "step": 5140, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0066103278659284115, + "timestamp": "2025-09-10 02:49:39.406875", + "step": 5141, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:39.459287", + "step": 5141, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015177327441051602, + "timestamp": "2025-09-10 02:49:39.465887", + "step": 5142, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:39.520352", + "step": 5142, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022303589503280818, + "timestamp": "2025-09-10 02:49:39.528285", + "step": 5143, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:39.581502", + "step": 5143, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025539478519931436, + "timestamp": "2025-09-10 02:49:39.588646", + "step": 5144, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:39.641524", + "step": 5144, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026020356453955173, + "timestamp": "2025-09-10 02:49:39.651413", + "step": 5145, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:49:56.557882", + "step": 5145, + "epoch": 3 + }, + { + "type": "pplx", + "content": 20796887.005170308, + "timestamp": "2025-09-10 02:49:56.560640", + "step": 5145, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:49:56.614034", + "step": 5145, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010840559843927622, + "timestamp": "2025-09-10 02:49:56.616063", + "step": 5146, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:56.669698", + "step": 5146, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04474133253097534, + "timestamp": "2025-09-10 02:49:56.671897", + "step": 5147, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:56.730900", + "step": 5147, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0041691334918141365, + "timestamp": "2025-09-10 02:49:56.737974", + "step": 5148, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:56.793665", + "step": 5148, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019682837591972202, + "timestamp": "2025-09-10 02:49:56.795627", + "step": 5149, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:56.848385", + "step": 5149, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014704102650284767, + "timestamp": "2025-09-10 02:49:56.850619", + "step": 5150, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:56.907156", + "step": 5150, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.698314923094586e-05, + "timestamp": "2025-09-10 02:49:56.916709", + "step": 5151, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:56.977645", + "step": 5151, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.029849465936422348, + "timestamp": "2025-09-10 02:49:56.988885", + "step": 5152, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:57.042645", + "step": 5152, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014911588514223695, + "timestamp": "2025-09-10 02:49:57.044668", + "step": 5153, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:57.097245", + "step": 5153, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010748127242550254, + "timestamp": "2025-09-10 02:49:57.103951", + "step": 5154, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:57.157188", + "step": 5154, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036259947228245437, + "timestamp": "2025-09-10 02:49:57.159313", + "step": 5155, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:57.212485", + "step": 5155, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001834212918765843, + "timestamp": "2025-09-10 02:49:57.218823", + "step": 5156, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:57.272654", + "step": 5156, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004516570770647377, + "timestamp": "2025-09-10 02:49:57.274763", + "step": 5157, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:57.327689", + "step": 5157, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00803940836340189, + "timestamp": "2025-09-10 02:49:57.330069", + "step": 5158, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:49:57.391438", + "step": 5158, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007134335581213236, + "timestamp": "2025-09-10 02:49:57.402545", + "step": 5159, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:57.455351", + "step": 5159, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022051816806197166, + "timestamp": "2025-09-10 02:49:57.461261", + "step": 5160, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:57.513678", + "step": 5160, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005428260774351656, + "timestamp": "2025-09-10 02:49:57.516241", + "step": 5161, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:57.581565", + "step": 5161, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.055736882146448e-05, + "timestamp": "2025-09-10 02:49:57.591376", + "step": 5162, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:57.644310", + "step": 5162, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029876951593905687, + "timestamp": "2025-09-10 02:49:57.652319", + "step": 5163, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:57.705694", + "step": 5163, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01764310896396637, + "timestamp": "2025-09-10 02:49:57.715231", + "step": 5164, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:57.772716", + "step": 5164, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04342194274067879, + "timestamp": "2025-09-10 02:49:57.774656", + "step": 5165, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:57.826886", + "step": 5165, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001361071947030723, + "timestamp": "2025-09-10 02:49:57.829071", + "step": 5166, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:57.882040", + "step": 5166, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007704917225055397, + "timestamp": "2025-09-10 02:49:57.884018", + "step": 5167, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:49:57.938939", + "step": 5167, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030907695181667805, + "timestamp": "2025-09-10 02:49:57.949519", + "step": 5168, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:58.010035", + "step": 5168, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008091972209513187, + "timestamp": "2025-09-10 02:49:58.016219", + "step": 5169, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:58.072195", + "step": 5169, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026753931306302547, + "timestamp": "2025-09-10 02:49:58.074017", + "step": 5170, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:58.130740", + "step": 5170, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003592865541577339, + "timestamp": "2025-09-10 02:49:58.133734", + "step": 5171, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:58.186135", + "step": 5171, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043954423745162785, + "timestamp": "2025-09-10 02:49:58.191987", + "step": 5172, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:58.246496", + "step": 5172, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014983757864683867, + "timestamp": "2025-09-10 02:49:58.249424", + "step": 5173, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:58.302483", + "step": 5173, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003581056371331215, + "timestamp": "2025-09-10 02:49:58.304496", + "step": 5174, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:58.356780", + "step": 5174, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002480275579728186, + "timestamp": "2025-09-10 02:49:58.364613", + "step": 5175, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:58.421891", + "step": 5175, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011433314066380262, + "timestamp": "2025-09-10 02:49:58.428837", + "step": 5176, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:49:58.485399", + "step": 5176, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000771212566178292, + "timestamp": "2025-09-10 02:49:58.496647", + "step": 5177, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:49:58.552360", + "step": 5177, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0605146549642086, + "timestamp": "2025-09-10 02:49:58.561916", + "step": 5178, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:58.621368", + "step": 5178, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00782932247966528, + "timestamp": "2025-09-10 02:49:58.623300", + "step": 5179, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:58.676099", + "step": 5179, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001462681801058352, + "timestamp": "2025-09-10 02:49:58.682330", + "step": 5180, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:58.737098", + "step": 5180, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009882143698632717, + "timestamp": "2025-09-10 02:49:58.739188", + "step": 5181, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:49:58.792230", + "step": 5181, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024378912057727575, + "timestamp": "2025-09-10 02:49:58.800284", + "step": 5182, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:58.853367", + "step": 5182, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007555658230558038, + "timestamp": "2025-09-10 02:49:58.859737", + "step": 5183, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:58.913253", + "step": 5183, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016349944053217769, + "timestamp": "2025-09-10 02:49:58.919201", + "step": 5184, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:58.971661", + "step": 5184, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012841911520808935, + "timestamp": "2025-09-10 02:49:58.978305", + "step": 5185, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:49:59.030423", + "step": 5185, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008138803532347083, + "timestamp": "2025-09-10 02:49:59.032522", + "step": 5186, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:59.084873", + "step": 5186, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005838603712618351, + "timestamp": "2025-09-10 02:49:59.087178", + "step": 5187, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:59.140050", + "step": 5187, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002849035372491926, + "timestamp": "2025-09-10 02:49:59.145860", + "step": 5188, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:49:59.198381", + "step": 5188, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014062098925933242, + "timestamp": "2025-09-10 02:49:59.201128", + "step": 5189, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:59.254421", + "step": 5189, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01566436141729355, + "timestamp": "2025-09-10 02:49:59.256429", + "step": 5190, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:59.308812", + "step": 5190, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013595082564279437, + "timestamp": "2025-09-10 02:49:59.310960", + "step": 5191, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:59.363460", + "step": 5191, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016373340040445328, + "timestamp": "2025-09-10 02:49:59.369093", + "step": 5192, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:59.421187", + "step": 5192, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011580752208828926, + "timestamp": "2025-09-10 02:49:59.427636", + "step": 5193, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:59.480557", + "step": 5193, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003131380653940141, + "timestamp": "2025-09-10 02:49:59.482837", + "step": 5194, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:49:59.535218", + "step": 5194, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005036334623582661, + "timestamp": "2025-09-10 02:49:59.537122", + "step": 5195, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:49:59.589263", + "step": 5195, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003483194450382143, + "timestamp": "2025-09-10 02:49:59.595044", + "step": 5196, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:49:59.647143", + "step": 5196, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005673426203429699, + "timestamp": "2025-09-10 02:49:59.649167", + "step": 5197, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:49:59.701577", + "step": 5197, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017793395090848207, + "timestamp": "2025-09-10 02:49:59.708101", + "step": 5198, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:49:59.760737", + "step": 5198, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01295098103582859, + "timestamp": "2025-09-10 02:49:59.762776", + "step": 5199, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:49:59.829216", + "step": 5199, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029301675967872143, + "timestamp": "2025-09-10 02:49:59.842246", + "step": 5200, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:59.897251", + "step": 5200, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006738324882462621, + "timestamp": "2025-09-10 02:49:59.899397", + "step": 5201, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:49:59.952762", + "step": 5201, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02316547930240631, + "timestamp": "2025-09-10 02:49:59.955069", + "step": 5202, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:00.008209", + "step": 5202, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002208786318078637, + "timestamp": "2025-09-10 02:50:00.011364", + "step": 5203, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:50:00.079789", + "step": 5203, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.032139282673597336, + "timestamp": "2025-09-10 02:50:00.093240", + "step": 5204, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:00.145309", + "step": 5204, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005456877988763154, + "timestamp": "2025-09-10 02:50:00.147393", + "step": 5205, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:00.207882", + "step": 5205, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021430044434964657, + "timestamp": "2025-09-10 02:50:00.218550", + "step": 5206, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:00.270891", + "step": 5206, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000990464468486607, + "timestamp": "2025-09-10 02:50:00.274024", + "step": 5207, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:00.326898", + "step": 5207, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004958632052876055, + "timestamp": "2025-09-10 02:50:00.332614", + "step": 5208, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:00.384371", + "step": 5208, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0046188137494027615, + "timestamp": "2025-09-10 02:50:00.386672", + "step": 5209, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:00.439418", + "step": 5209, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005221318919211626, + "timestamp": "2025-09-10 02:50:00.441673", + "step": 5210, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:50:00.493962", + "step": 5210, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004844017326831818, + "timestamp": "2025-09-10 02:50:00.496021", + "step": 5211, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:50:00.564060", + "step": 5211, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002229286590591073, + "timestamp": "2025-09-10 02:50:00.577423", + "step": 5212, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:50:00.629166", + "step": 5212, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018672727746888995, + "timestamp": "2025-09-10 02:50:00.631353", + "step": 5213, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:00.684131", + "step": 5213, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01097164861857891, + "timestamp": "2025-09-10 02:50:00.686296", + "step": 5214, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:00.740038", + "step": 5214, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007613471825607121, + "timestamp": "2025-09-10 02:50:00.742221", + "step": 5215, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:00.795352", + "step": 5215, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011177508858963847, + "timestamp": "2025-09-10 02:50:00.802479", + "step": 5216, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:00.855524", + "step": 5216, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010218006558716297, + "timestamp": "2025-09-10 02:50:00.857834", + "step": 5217, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:00.911164", + "step": 5217, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015347718261182308, + "timestamp": "2025-09-10 02:50:00.917741", + "step": 5218, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:50:00.978650", + "step": 5218, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009092840366065502, + "timestamp": "2025-09-10 02:50:00.989532", + "step": 5219, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:01.042548", + "step": 5219, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.040490929037332535, + "timestamp": "2025-09-10 02:50:01.048814", + "step": 5220, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:01.100726", + "step": 5220, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003533871378749609, + "timestamp": "2025-09-10 02:50:01.103664", + "step": 5221, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:01.158749", + "step": 5221, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028786794282495975, + "timestamp": "2025-09-10 02:50:01.161755", + "step": 5222, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:01.215246", + "step": 5222, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002021850785240531, + "timestamp": "2025-09-10 02:50:01.224853", + "step": 5223, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:01.278956", + "step": 5223, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019813692197203636, + "timestamp": "2025-09-10 02:50:01.289518", + "step": 5224, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:50:01.348659", + "step": 5224, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005793183110654354, + "timestamp": "2025-09-10 02:50:01.360437", + "step": 5225, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:01.414065", + "step": 5225, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012712853029370308, + "timestamp": "2025-09-10 02:50:01.420353", + "step": 5226, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:01.473476", + "step": 5226, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008272496052086353, + "timestamp": "2025-09-10 02:50:01.476388", + "step": 5227, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:50:01.544128", + "step": 5227, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0072872755117714405, + "timestamp": "2025-09-10 02:50:01.557479", + "step": 5228, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:01.609647", + "step": 5228, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005303137004375458, + "timestamp": "2025-09-10 02:50:01.611446", + "step": 5229, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:01.672045", + "step": 5229, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010862888302654028, + "timestamp": "2025-09-10 02:50:01.682804", + "step": 5230, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:01.741629", + "step": 5230, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03051486611366272, + "timestamp": "2025-09-10 02:50:01.752015", + "step": 5231, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:01.805061", + "step": 5231, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011646350845694542, + "timestamp": "2025-09-10 02:50:01.810819", + "step": 5232, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:01.863905", + "step": 5232, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015296988422051072, + "timestamp": "2025-09-10 02:50:01.872272", + "step": 5233, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:01.925014", + "step": 5233, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009831218980252743, + "timestamp": "2025-09-10 02:50:01.926787", + "step": 5234, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:01.979186", + "step": 5234, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003388639772310853, + "timestamp": "2025-09-10 02:50:01.980949", + "step": 5235, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:02.033555", + "step": 5235, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012169089168310165, + "timestamp": "2025-09-10 02:50:02.039207", + "step": 5236, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:02.091699", + "step": 5236, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015258780913427472, + "timestamp": "2025-09-10 02:50:02.102000", + "step": 5237, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:02.155402", + "step": 5237, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004886474343948066, + "timestamp": "2025-09-10 02:50:02.158507", + "step": 5238, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:02.213982", + "step": 5238, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014229947701096535, + "timestamp": "2025-09-10 02:50:02.216008", + "step": 5239, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:02.268360", + "step": 5239, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020801449194550514, + "timestamp": "2025-09-10 02:50:02.274068", + "step": 5240, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:02.326221", + "step": 5240, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002947020810097456, + "timestamp": "2025-09-10 02:50:02.328188", + "step": 5241, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:02.380774", + "step": 5241, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006273990031331778, + "timestamp": "2025-09-10 02:50:02.387519", + "step": 5242, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:02.440336", + "step": 5242, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004903948865830898, + "timestamp": "2025-09-10 02:50:02.442432", + "step": 5243, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:02.494775", + "step": 5243, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004805770702660084, + "timestamp": "2025-09-10 02:50:02.500368", + "step": 5244, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:02.552023", + "step": 5244, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008763536461628973, + "timestamp": "2025-09-10 02:50:02.562236", + "step": 5245, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:02.615075", + "step": 5245, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014887356664985418, + "timestamp": "2025-09-10 02:50:02.618089", + "step": 5246, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:50:02.691564", + "step": 5246, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012682790402323008, + "timestamp": "2025-09-10 02:50:02.705229", + "step": 5247, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:02.760656", + "step": 5247, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006945140194147825, + "timestamp": "2025-09-10 02:50:02.766329", + "step": 5248, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:02.818200", + "step": 5248, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011602779850363731, + "timestamp": "2025-09-10 02:50:02.820130", + "step": 5249, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 688 + ], + "flops": 13760083599040.0 + }, + "timestamp": "2025-09-10 02:50:02.919393", + "step": 5249, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011014706688001752, + "timestamp": "2025-09-10 02:50:02.938681", + "step": 5250, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:02.991211", + "step": 5250, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003870258224196732, + "timestamp": "2025-09-10 02:50:02.993245", + "step": 5251, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:03.045795", + "step": 5251, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027917753905057907, + "timestamp": "2025-09-10 02:50:03.051352", + "step": 5252, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:03.103503", + "step": 5252, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017253583064302802, + "timestamp": "2025-09-10 02:50:03.105529", + "step": 5253, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:03.157895", + "step": 5253, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008569721831008792, + "timestamp": "2025-09-10 02:50:03.160570", + "step": 5254, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:03.214432", + "step": 5254, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019780828151851892, + "timestamp": "2025-09-10 02:50:03.224072", + "step": 5255, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:03.278397", + "step": 5255, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001109988079406321, + "timestamp": "2025-09-10 02:50:03.283956", + "step": 5256, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:03.337577", + "step": 5256, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010690425522625446, + "timestamp": "2025-09-10 02:50:03.339686", + "step": 5257, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:50:03.406882", + "step": 5257, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007719636196270585, + "timestamp": "2025-09-10 02:50:03.419075", + "step": 5258, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:03.474132", + "step": 5258, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013832383789122105, + "timestamp": "2025-09-10 02:50:03.476227", + "step": 5259, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 560 + ], + "flops": 11200068058304.0 + }, + "timestamp": "2025-09-10 02:50:03.559700", + "step": 5259, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005243135965429246, + "timestamp": "2025-09-10 02:50:03.575875", + "step": 5260, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:03.628883", + "step": 5260, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010769384680315852, + "timestamp": "2025-09-10 02:50:03.631037", + "step": 5261, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:03.683458", + "step": 5261, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008988159825094044, + "timestamp": "2025-09-10 02:50:03.690163", + "step": 5262, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:03.745355", + "step": 5262, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001947202137671411, + "timestamp": "2025-09-10 02:50:03.747503", + "step": 5263, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:03.800540", + "step": 5263, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004767652426380664, + "timestamp": "2025-09-10 02:50:03.810382", + "step": 5264, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:03.863074", + "step": 5264, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008745061932131648, + "timestamp": "2025-09-10 02:50:03.873562", + "step": 5265, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:03.927947", + "step": 5265, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003695491177495569, + "timestamp": "2025-09-10 02:50:03.937516", + "step": 5266, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:03.996051", + "step": 5266, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004437750205397606, + "timestamp": "2025-09-10 02:50:03.998048", + "step": 5267, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:04.051394", + "step": 5267, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0095591489225626, + "timestamp": "2025-09-10 02:50:04.061750", + "step": 5268, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:04.113934", + "step": 5268, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005434318445622921, + "timestamp": "2025-09-10 02:50:04.115969", + "step": 5269, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:04.169009", + "step": 5269, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002766565012279898, + "timestamp": "2025-09-10 02:50:04.171104", + "step": 5270, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:04.223379", + "step": 5270, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032608662149868906, + "timestamp": "2025-09-10 02:50:04.225431", + "step": 5271, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:50:04.286624", + "step": 5271, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018476485274732113, + "timestamp": "2025-09-10 02:50:04.298482", + "step": 5272, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:04.350697", + "step": 5272, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.050519172102212906, + "timestamp": "2025-09-10 02:50:04.352929", + "step": 5273, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:04.405265", + "step": 5273, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01290177647024393, + "timestamp": "2025-09-10 02:50:04.407599", + "step": 5274, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:04.460456", + "step": 5274, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00845426507294178, + "timestamp": "2025-09-10 02:50:04.462593", + "step": 5275, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:04.515600", + "step": 5275, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026969367172569036, + "timestamp": "2025-09-10 02:50:04.526002", + "step": 5276, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:50:04.585166", + "step": 5276, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003303184930700809, + "timestamp": "2025-09-10 02:50:04.596920", + "step": 5277, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:04.649658", + "step": 5277, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027105682529509068, + "timestamp": "2025-09-10 02:50:04.651843", + "step": 5278, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:04.704198", + "step": 5278, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017241363821085542, + "timestamp": "2025-09-10 02:50:04.706185", + "step": 5279, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:04.759121", + "step": 5279, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005337646696716547, + "timestamp": "2025-09-10 02:50:04.768050", + "step": 5280, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:04.820226", + "step": 5280, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004308401548769325, + "timestamp": "2025-09-10 02:50:04.822274", + "step": 5281, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:04.875457", + "step": 5281, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00246291421353817, + "timestamp": "2025-09-10 02:50:04.877576", + "step": 5282, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:04.929980", + "step": 5282, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011630167718976736, + "timestamp": "2025-09-10 02:50:04.932070", + "step": 5283, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:04.984676", + "step": 5283, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007223097491078079, + "timestamp": "2025-09-10 02:50:04.992056", + "step": 5284, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:05.044053", + "step": 5284, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017209595069289207, + "timestamp": "2025-09-10 02:50:05.046108", + "step": 5285, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:05.098883", + "step": 5285, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007841082289814949, + "timestamp": "2025-09-10 02:50:05.100983", + "step": 5286, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:50:05.172825", + "step": 5286, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009586544707417488, + "timestamp": "2025-09-10 02:50:05.186310", + "step": 5287, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:05.241373", + "step": 5287, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012090668082237244, + "timestamp": "2025-09-10 02:50:05.251939", + "step": 5288, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:05.304447", + "step": 5288, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006836647517047822, + "timestamp": "2025-09-10 02:50:05.306611", + "step": 5289, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:05.359060", + "step": 5289, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011419111862778664, + "timestamp": "2025-09-10 02:50:05.361316", + "step": 5290, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:05.413974", + "step": 5290, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009466545889154077, + "timestamp": "2025-09-10 02:50:05.422360", + "step": 5291, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:05.480355", + "step": 5291, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024271532893180847, + "timestamp": "2025-09-10 02:50:05.491536", + "step": 5292, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:50:22.271618", + "step": 5292, + "epoch": 3 + }, + { + "type": "pplx", + "content": 18397125.397900093, + "timestamp": "2025-09-10 02:50:22.274512", + "step": 5292, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:22.327638", + "step": 5292, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011591921793296933, + "timestamp": "2025-09-10 02:50:22.329836", + "step": 5293, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:22.383701", + "step": 5293, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004721366334706545, + "timestamp": "2025-09-10 02:50:22.389465", + "step": 5294, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:22.443783", + "step": 5294, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016440298641100526, + "timestamp": "2025-09-10 02:50:22.446135", + "step": 5295, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:22.499553", + "step": 5295, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005809945520013571, + "timestamp": "2025-09-10 02:50:22.505776", + "step": 5296, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:22.558844", + "step": 5296, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003539426252245903, + "timestamp": "2025-09-10 02:50:22.561192", + "step": 5297, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:22.616786", + "step": 5297, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01256350614130497, + "timestamp": "2025-09-10 02:50:22.626384", + "step": 5298, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:22.681203", + "step": 5298, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010120259830728173, + "timestamp": "2025-09-10 02:50:22.683406", + "step": 5299, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:22.736971", + "step": 5299, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008106070454232395, + "timestamp": "2025-09-10 02:50:22.745796", + "step": 5300, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:22.799346", + "step": 5300, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009015606716275215, + "timestamp": "2025-09-10 02:50:22.801735", + "step": 5301, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:22.855084", + "step": 5301, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003921045456081629, + "timestamp": "2025-09-10 02:50:22.857952", + "step": 5302, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 816 + ], + "flops": 16320099139776.0 + }, + "timestamp": "2025-09-10 02:50:22.981061", + "step": 5302, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001959327346412465, + "timestamp": "2025-09-10 02:50:23.004068", + "step": 5303, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:23.062657", + "step": 5303, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00602575670927763, + "timestamp": "2025-09-10 02:50:23.069013", + "step": 5304, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:23.121788", + "step": 5304, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045856714132241905, + "timestamp": "2025-09-10 02:50:23.128180", + "step": 5305, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:23.182475", + "step": 5305, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004988010041415691, + "timestamp": "2025-09-10 02:50:23.192070", + "step": 5306, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:23.245894", + "step": 5306, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001976949133677408, + "timestamp": "2025-09-10 02:50:23.248123", + "step": 5307, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:23.301395", + "step": 5307, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002062815008684993, + "timestamp": "2025-09-10 02:50:23.307758", + "step": 5308, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:23.360894", + "step": 5308, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031036154832690954, + "timestamp": "2025-09-10 02:50:23.363295", + "step": 5309, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:50:23.426194", + "step": 5309, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016385355265811086, + "timestamp": "2025-09-10 02:50:23.437294", + "step": 5310, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:23.490665", + "step": 5310, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016528499545529485, + "timestamp": "2025-09-10 02:50:23.492734", + "step": 5311, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:23.546085", + "step": 5311, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014376426115632057, + "timestamp": "2025-09-10 02:50:23.552156", + "step": 5312, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:23.604568", + "step": 5312, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029676572885364294, + "timestamp": "2025-09-10 02:50:23.607660", + "step": 5313, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:23.660617", + "step": 5313, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019673847127705812, + "timestamp": "2025-09-10 02:50:23.663009", + "step": 5314, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:23.716001", + "step": 5314, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012650678399950266, + "timestamp": "2025-09-10 02:50:23.718546", + "step": 5315, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:23.772121", + "step": 5315, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004298098385334015, + "timestamp": "2025-09-10 02:50:23.778304", + "step": 5316, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:23.831013", + "step": 5316, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012793357018381357, + "timestamp": "2025-09-10 02:50:23.840855", + "step": 5317, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:23.895615", + "step": 5317, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012616243911907077, + "timestamp": "2025-09-10 02:50:23.905397", + "step": 5318, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:50:23.966927", + "step": 5318, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018142081098631024, + "timestamp": "2025-09-10 02:50:23.977776", + "step": 5319, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:24.032002", + "step": 5319, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012109326053177938, + "timestamp": "2025-09-10 02:50:24.038117", + "step": 5320, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:24.091220", + "step": 5320, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007012794376350939, + "timestamp": "2025-09-10 02:50:24.099098", + "step": 5321, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:24.153444", + "step": 5321, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.7632090020924807e-05, + "timestamp": "2025-09-10 02:50:24.155546", + "step": 5322, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:24.210088", + "step": 5322, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014325756637845188, + "timestamp": "2025-09-10 02:50:24.212519", + "step": 5323, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:24.267232", + "step": 5323, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011387856677174568, + "timestamp": "2025-09-10 02:50:24.273833", + "step": 5324, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:24.326952", + "step": 5324, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007675164379179478, + "timestamp": "2025-09-10 02:50:24.329005", + "step": 5325, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:24.382665", + "step": 5325, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004846174269914627, + "timestamp": "2025-09-10 02:50:24.388952", + "step": 5326, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:24.447956", + "step": 5326, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011303840437904, + "timestamp": "2025-09-10 02:50:24.458390", + "step": 5327, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:24.512542", + "step": 5327, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008211983949877322, + "timestamp": "2025-09-10 02:50:24.522929", + "step": 5328, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:24.575952", + "step": 5328, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006613574223592877, + "timestamp": "2025-09-10 02:50:24.578140", + "step": 5329, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:24.638834", + "step": 5329, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007567502558231354, + "timestamp": "2025-09-10 02:50:24.649566", + "step": 5330, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:24.703403", + "step": 5330, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003144640475511551, + "timestamp": "2025-09-10 02:50:24.705722", + "step": 5331, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:24.759139", + "step": 5331, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.945012809301261e-05, + "timestamp": "2025-09-10 02:50:24.765408", + "step": 5332, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:24.819605", + "step": 5332, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016977153718471527, + "timestamp": "2025-09-10 02:50:24.823050", + "step": 5333, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:24.876603", + "step": 5333, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.153677218710072e-05, + "timestamp": "2025-09-10 02:50:24.883260", + "step": 5334, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:24.937150", + "step": 5334, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016523647354915738, + "timestamp": "2025-09-10 02:50:24.945241", + "step": 5335, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:24.998827", + "step": 5335, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.352885273168795e-05, + "timestamp": "2025-09-10 02:50:25.005504", + "step": 5336, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:25.058743", + "step": 5336, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002271160774398595, + "timestamp": "2025-09-10 02:50:25.060946", + "step": 5337, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:50:25.113485", + "step": 5337, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048105441965162754, + "timestamp": "2025-09-10 02:50:25.115878", + "step": 5338, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:25.172785", + "step": 5338, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05529012158513069, + "timestamp": "2025-09-10 02:50:25.175107", + "step": 5339, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:25.236075", + "step": 5339, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013532188313547522, + "timestamp": "2025-09-10 02:50:25.242109", + "step": 5340, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:25.297393", + "step": 5340, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.117966692429036e-05, + "timestamp": "2025-09-10 02:50:25.301050", + "step": 5341, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:25.354272", + "step": 5341, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017126834718510509, + "timestamp": "2025-09-10 02:50:25.360855", + "step": 5342, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:25.414290", + "step": 5342, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002101500576827675, + "timestamp": "2025-09-10 02:50:25.416539", + "step": 5343, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:25.469957", + "step": 5343, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005071446648798883, + "timestamp": "2025-09-10 02:50:25.480329", + "step": 5344, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:25.535297", + "step": 5344, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.599417272605933e-05, + "timestamp": "2025-09-10 02:50:25.545710", + "step": 5345, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:25.610795", + "step": 5345, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012619995686691254, + "timestamp": "2025-09-10 02:50:25.614474", + "step": 5346, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:25.672047", + "step": 5346, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019272010831627995, + "timestamp": "2025-09-10 02:50:25.674466", + "step": 5347, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:25.734136", + "step": 5347, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006375996861606836, + "timestamp": "2025-09-10 02:50:25.740213", + "step": 5348, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:25.792885", + "step": 5348, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03304572030901909, + "timestamp": "2025-09-10 02:50:25.795176", + "step": 5349, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:25.849671", + "step": 5349, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018502881284803152, + "timestamp": "2025-09-10 02:50:25.859279", + "step": 5350, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:25.915180", + "step": 5350, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003769530158024281, + "timestamp": "2025-09-10 02:50:25.917415", + "step": 5351, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:25.980407", + "step": 5351, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02025861106812954, + "timestamp": "2025-09-10 02:50:25.986598", + "step": 5352, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:26.039549", + "step": 5352, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017843242967501283, + "timestamp": "2025-09-10 02:50:26.044597", + "step": 5353, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:26.100910", + "step": 5353, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005629266379401088, + "timestamp": "2025-09-10 02:50:26.110696", + "step": 5354, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:26.165991", + "step": 5354, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008372010779567063, + "timestamp": "2025-09-10 02:50:26.175752", + "step": 5355, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:26.229762", + "step": 5355, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001248279819265008, + "timestamp": "2025-09-10 02:50:26.236052", + "step": 5356, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:26.301898", + "step": 5356, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006882089655846357, + "timestamp": "2025-09-10 02:50:26.304196", + "step": 5357, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:26.358507", + "step": 5357, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024386330042034388, + "timestamp": "2025-09-10 02:50:26.367998", + "step": 5358, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:26.423212", + "step": 5358, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008268129080533981, + "timestamp": "2025-09-10 02:50:26.427502", + "step": 5359, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:26.482615", + "step": 5359, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021984220948070288, + "timestamp": "2025-09-10 02:50:26.488739", + "step": 5360, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:26.542010", + "step": 5360, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.90206652507186e-05, + "timestamp": "2025-09-10 02:50:26.544139", + "step": 5361, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:26.598008", + "step": 5361, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016047522658482194, + "timestamp": "2025-09-10 02:50:26.600915", + "step": 5362, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:26.655044", + "step": 5362, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024346180725842714, + "timestamp": "2025-09-10 02:50:26.657452", + "step": 5363, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:50:26.729624", + "step": 5363, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031420416780747473, + "timestamp": "2025-09-10 02:50:26.742638", + "step": 5364, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:26.800140", + "step": 5364, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.489165727747604e-05, + "timestamp": "2025-09-10 02:50:26.802397", + "step": 5365, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:26.855891", + "step": 5365, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002581734152045101, + "timestamp": "2025-09-10 02:50:26.858872", + "step": 5366, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:50:26.920786", + "step": 5366, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016176769509911537, + "timestamp": "2025-09-10 02:50:26.931690", + "step": 5367, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:26.991576", + "step": 5367, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026264488697052002, + "timestamp": "2025-09-10 02:50:27.001202", + "step": 5368, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:27.055888", + "step": 5368, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048563111340627074, + "timestamp": "2025-09-10 02:50:27.061962", + "step": 5369, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:27.115774", + "step": 5369, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005964773008599877, + "timestamp": "2025-09-10 02:50:27.125380", + "step": 5370, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:27.180216", + "step": 5370, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005746359471231699, + "timestamp": "2025-09-10 02:50:27.190027", + "step": 5371, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:27.243450", + "step": 5371, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014409332070499659, + "timestamp": "2025-09-10 02:50:27.249395", + "step": 5372, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:27.302321", + "step": 5372, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011967864818871021, + "timestamp": "2025-09-10 02:50:27.304705", + "step": 5373, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:27.357519", + "step": 5373, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001208591042086482, + "timestamp": "2025-09-10 02:50:27.360711", + "step": 5374, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:27.414314", + "step": 5374, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003253155737183988, + "timestamp": "2025-09-10 02:50:27.416771", + "step": 5375, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:27.470978", + "step": 5375, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001208487548865378, + "timestamp": "2025-09-10 02:50:27.476994", + "step": 5376, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:27.529330", + "step": 5376, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.788415339542553e-05, + "timestamp": "2025-09-10 02:50:27.532362", + "step": 5377, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:27.586534", + "step": 5377, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01623762585222721, + "timestamp": "2025-09-10 02:50:27.596176", + "step": 5378, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:27.649286", + "step": 5378, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008726070635020733, + "timestamp": "2025-09-10 02:50:27.651366", + "step": 5379, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:27.705050", + "step": 5379, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001972564961761236, + "timestamp": "2025-09-10 02:50:27.711195", + "step": 5380, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:27.765181", + "step": 5380, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.038134124130010605, + "timestamp": "2025-09-10 02:50:27.772039", + "step": 5381, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:27.826305", + "step": 5381, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006542717455886304, + "timestamp": "2025-09-10 02:50:27.828527", + "step": 5382, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:27.881842", + "step": 5382, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002229032339528203, + "timestamp": "2025-09-10 02:50:27.883998", + "step": 5383, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:27.936618", + "step": 5383, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010771410539746284, + "timestamp": "2025-09-10 02:50:27.942558", + "step": 5384, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:27.995188", + "step": 5384, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005568374181166291, + "timestamp": "2025-09-10 02:50:27.997557", + "step": 5385, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:28.050713", + "step": 5385, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015441581490449607, + "timestamp": "2025-09-10 02:50:28.057300", + "step": 5386, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:28.110932", + "step": 5386, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003305087157059461, + "timestamp": "2025-09-10 02:50:28.113476", + "step": 5387, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:50:28.183079", + "step": 5387, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05367087200284004, + "timestamp": "2025-09-10 02:50:28.196542", + "step": 5388, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:28.251310", + "step": 5388, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030221480410546064, + "timestamp": "2025-09-10 02:50:28.255550", + "step": 5389, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:28.310438", + "step": 5389, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005331829888746142, + "timestamp": "2025-09-10 02:50:28.314137", + "step": 5390, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:28.370259", + "step": 5390, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002382699603913352, + "timestamp": "2025-09-10 02:50:28.372799", + "step": 5391, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:28.428084", + "step": 5391, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014002284035086632, + "timestamp": "2025-09-10 02:50:28.434525", + "step": 5392, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:28.487980", + "step": 5392, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010845428332686424, + "timestamp": "2025-09-10 02:50:28.490410", + "step": 5393, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:28.551609", + "step": 5393, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007954374304972589, + "timestamp": "2025-09-10 02:50:28.562317", + "step": 5394, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:28.616194", + "step": 5394, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015510314842686057, + "timestamp": "2025-09-10 02:50:28.618493", + "step": 5395, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:28.671212", + "step": 5395, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014971595373935997, + "timestamp": "2025-09-10 02:50:28.677483", + "step": 5396, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:28.730231", + "step": 5396, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0469910204410553, + "timestamp": "2025-09-10 02:50:28.733041", + "step": 5397, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:28.786597", + "step": 5397, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002077273325994611, + "timestamp": "2025-09-10 02:50:28.788914", + "step": 5398, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:50:28.855425", + "step": 5398, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.038137685507535934, + "timestamp": "2025-09-10 02:50:28.867655", + "step": 5399, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:28.921675", + "step": 5399, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0066796415485441685, + "timestamp": "2025-09-10 02:50:28.928130", + "step": 5400, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:28.981242", + "step": 5400, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020001684315502644, + "timestamp": "2025-09-10 02:50:28.983789", + "step": 5401, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:29.037351", + "step": 5401, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014558539260178804, + "timestamp": "2025-09-10 02:50:29.039932", + "step": 5402, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:29.093342", + "step": 5402, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030777842039242387, + "timestamp": "2025-09-10 02:50:29.095676", + "step": 5403, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:29.149367", + "step": 5403, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032638476113788784, + "timestamp": "2025-09-10 02:50:29.159768", + "step": 5404, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:29.212587", + "step": 5404, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007634016801603138, + "timestamp": "2025-09-10 02:50:29.217656", + "step": 5405, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:29.272916", + "step": 5405, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004233828221913427, + "timestamp": "2025-09-10 02:50:29.275273", + "step": 5406, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:29.328417", + "step": 5406, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009472401812672615, + "timestamp": "2025-09-10 02:50:29.330824", + "step": 5407, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:29.384166", + "step": 5407, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000296570680802688, + "timestamp": "2025-09-10 02:50:29.390358", + "step": 5408, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:29.443118", + "step": 5408, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006132566370069981, + "timestamp": "2025-09-10 02:50:29.445331", + "step": 5409, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:50:29.514110", + "step": 5409, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011745997704565525, + "timestamp": "2025-09-10 02:50:29.526786", + "step": 5410, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:29.580290", + "step": 5410, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013986477861180902, + "timestamp": "2025-09-10 02:50:29.588322", + "step": 5411, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:29.641902", + "step": 5411, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001512405724497512, + "timestamp": "2025-09-10 02:50:29.648052", + "step": 5412, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:29.700847", + "step": 5412, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004838789813220501, + "timestamp": "2025-09-10 02:50:29.703177", + "step": 5413, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:29.757936", + "step": 5413, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04057854786515236, + "timestamp": "2025-09-10 02:50:29.760175", + "step": 5414, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:29.815423", + "step": 5414, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000567415845580399, + "timestamp": "2025-09-10 02:50:29.825171", + "step": 5415, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:29.880104", + "step": 5415, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001277577830478549, + "timestamp": "2025-09-10 02:50:29.886853", + "step": 5416, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:29.941093", + "step": 5416, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011723192874342203, + "timestamp": "2025-09-10 02:50:29.948194", + "step": 5417, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:30.002953", + "step": 5417, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003885416779667139, + "timestamp": "2025-09-10 02:50:30.005244", + "step": 5418, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:30.060791", + "step": 5418, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004850219469517469, + "timestamp": "2025-09-10 02:50:30.070574", + "step": 5419, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:30.124242", + "step": 5419, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011279440484941006, + "timestamp": "2025-09-10 02:50:30.130552", + "step": 5420, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:30.183499", + "step": 5420, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006632082513533533, + "timestamp": "2025-09-10 02:50:30.193244", + "step": 5421, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:30.251349", + "step": 5421, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0063169486820697784, + "timestamp": "2025-09-10 02:50:30.261777", + "step": 5422, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:30.316319", + "step": 5422, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007491298019886017, + "timestamp": "2025-09-10 02:50:30.318794", + "step": 5423, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:30.372290", + "step": 5423, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004483237862586975, + "timestamp": "2025-09-10 02:50:30.378707", + "step": 5424, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:30.435470", + "step": 5424, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031863541807979345, + "timestamp": "2025-09-10 02:50:30.446658", + "step": 5425, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:50:30.500266", + "step": 5425, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005209577735513449, + "timestamp": "2025-09-10 02:50:30.502770", + "step": 5426, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:30.556912", + "step": 5426, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029170033521950245, + "timestamp": "2025-09-10 02:50:30.562624", + "step": 5427, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:30.616674", + "step": 5427, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009546552784740925, + "timestamp": "2025-09-10 02:50:30.623172", + "step": 5428, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:30.676822", + "step": 5428, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004046494606882334, + "timestamp": "2025-09-10 02:50:30.679182", + "step": 5429, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:30.736033", + "step": 5429, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021987634245306253, + "timestamp": "2025-09-10 02:50:30.740844", + "step": 5430, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:30.802367", + "step": 5430, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036865490255877376, + "timestamp": "2025-09-10 02:50:30.813044", + "step": 5431, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:30.868031", + "step": 5431, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032375380396842957, + "timestamp": "2025-09-10 02:50:30.875457", + "step": 5432, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:50:30.940582", + "step": 5432, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003072206163778901, + "timestamp": "2025-09-10 02:50:30.953791", + "step": 5433, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:31.008577", + "step": 5433, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000492691237013787, + "timestamp": "2025-09-10 02:50:31.016109", + "step": 5434, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:31.070751", + "step": 5434, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008733063004910946, + "timestamp": "2025-09-10 02:50:31.073067", + "step": 5435, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:31.127241", + "step": 5435, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006456434726715088, + "timestamp": "2025-09-10 02:50:31.135879", + "step": 5436, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:31.188625", + "step": 5436, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003922745119780302, + "timestamp": "2025-09-10 02:50:31.190940", + "step": 5437, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:31.244929", + "step": 5437, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002351520350202918, + "timestamp": "2025-09-10 02:50:31.246953", + "step": 5438, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:31.300179", + "step": 5438, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005290330736897886, + "timestamp": "2025-09-10 02:50:31.302558", + "step": 5439, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:50:48.245868", + "step": 5439, + "epoch": 3 + }, + { + "type": "pplx", + "content": 21247500.82676681, + "timestamp": "2025-09-10 02:50:48.249031", + "step": 5439, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:48.303235", + "step": 5439, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003753113327547908, + "timestamp": "2025-09-10 02:50:48.309781", + "step": 5440, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:48.363142", + "step": 5440, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000293015647912398, + "timestamp": "2025-09-10 02:50:48.365188", + "step": 5441, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:48.418445", + "step": 5441, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001151990843936801, + "timestamp": "2025-09-10 02:50:48.420593", + "step": 5442, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:50:48.488338", + "step": 5442, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008370094001293182, + "timestamp": "2025-09-10 02:50:48.500912", + "step": 5443, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:48.554963", + "step": 5443, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033184869680553675, + "timestamp": "2025-09-10 02:50:48.560942", + "step": 5444, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:48.614925", + "step": 5444, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004803019401151687, + "timestamp": "2025-09-10 02:50:48.625399", + "step": 5445, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:48.678708", + "step": 5445, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025308416225016117, + "timestamp": "2025-09-10 02:50:48.681076", + "step": 5446, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:48.734997", + "step": 5446, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000822969654109329, + "timestamp": "2025-09-10 02:50:48.742904", + "step": 5447, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:48.796634", + "step": 5447, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024315486662089825, + "timestamp": "2025-09-10 02:50:48.802760", + "step": 5448, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:48.855246", + "step": 5448, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02165428176522255, + "timestamp": "2025-09-10 02:50:48.857213", + "step": 5449, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:48.915328", + "step": 5449, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010248234495520592, + "timestamp": "2025-09-10 02:50:48.925796", + "step": 5450, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:50:48.992881", + "step": 5450, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003237777855247259, + "timestamp": "2025-09-10 02:50:49.005136", + "step": 5451, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:49.058620", + "step": 5451, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008273234707303345, + "timestamp": "2025-09-10 02:50:49.064747", + "step": 5452, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:49.117470", + "step": 5452, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005221560131758451, + "timestamp": "2025-09-10 02:50:49.120397", + "step": 5453, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:50:49.173547", + "step": 5453, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015310164308175445, + "timestamp": "2025-09-10 02:50:49.176190", + "step": 5454, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:49.229888", + "step": 5454, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003992181736975908, + "timestamp": "2025-09-10 02:50:49.232237", + "step": 5455, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:49.285370", + "step": 5455, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021070835646241903, + "timestamp": "2025-09-10 02:50:49.291169", + "step": 5456, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:49.344663", + "step": 5456, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002683685743249953, + "timestamp": "2025-09-10 02:50:49.347080", + "step": 5457, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:49.401186", + "step": 5457, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005706814699806273, + "timestamp": "2025-09-10 02:50:49.403578", + "step": 5458, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:49.456501", + "step": 5458, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032965565333142877, + "timestamp": "2025-09-10 02:50:49.458545", + "step": 5459, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:49.513396", + "step": 5459, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001771595561876893, + "timestamp": "2025-09-10 02:50:49.524003", + "step": 5460, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:49.576234", + "step": 5460, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002002180553972721, + "timestamp": "2025-09-10 02:50:49.578170", + "step": 5461, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:49.631207", + "step": 5461, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024736350402235985, + "timestamp": "2025-09-10 02:50:49.637744", + "step": 5462, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:49.690738", + "step": 5462, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006495007313787937, + "timestamp": "2025-09-10 02:50:49.692869", + "step": 5463, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:49.751037", + "step": 5463, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018918355635832995, + "timestamp": "2025-09-10 02:50:49.762263", + "step": 5464, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:49.815155", + "step": 5464, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016512200236320496, + "timestamp": "2025-09-10 02:50:49.817622", + "step": 5465, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:49.872124", + "step": 5465, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001879508257843554, + "timestamp": "2025-09-10 02:50:49.874286", + "step": 5466, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:49.927744", + "step": 5466, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005570161156356335, + "timestamp": "2025-09-10 02:50:49.935923", + "step": 5467, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:49.988952", + "step": 5467, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005711301346309483, + "timestamp": "2025-09-10 02:50:49.998293", + "step": 5468, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:50.050439", + "step": 5468, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002147696213796735, + "timestamp": "2025-09-10 02:50:50.053431", + "step": 5469, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:50.106365", + "step": 5469, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021026621107012033, + "timestamp": "2025-09-10 02:50:50.108659", + "step": 5470, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:50.161654", + "step": 5470, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018775284988805652, + "timestamp": "2025-09-10 02:50:50.164146", + "step": 5471, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:50:50.238591", + "step": 5471, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04646948724985123, + "timestamp": "2025-09-10 02:50:50.253049", + "step": 5472, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:50.306928", + "step": 5472, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019888910464942455, + "timestamp": "2025-09-10 02:50:50.317451", + "step": 5473, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:50.371566", + "step": 5473, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048026881995610893, + "timestamp": "2025-09-10 02:50:50.373889", + "step": 5474, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:50.426778", + "step": 5474, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014864002354443073, + "timestamp": "2025-09-10 02:50:50.428872", + "step": 5475, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:50.483235", + "step": 5475, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006738911033608019, + "timestamp": "2025-09-10 02:50:50.493824", + "step": 5476, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:50:50.554215", + "step": 5476, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004326379857957363, + "timestamp": "2025-09-10 02:50:50.566257", + "step": 5477, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:50.620155", + "step": 5477, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007383892661891878, + "timestamp": "2025-09-10 02:50:50.622521", + "step": 5478, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:50.676595", + "step": 5478, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009683158132247627, + "timestamp": "2025-09-10 02:50:50.679030", + "step": 5479, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:50.732190", + "step": 5479, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014861313393339515, + "timestamp": "2025-09-10 02:50:50.738135", + "step": 5480, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:50:50.803223", + "step": 5480, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002807900309562683, + "timestamp": "2025-09-10 02:50:50.816480", + "step": 5481, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:50.869477", + "step": 5481, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012591062113642693, + "timestamp": "2025-09-10 02:50:50.871604", + "step": 5482, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:50.924062", + "step": 5482, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022591904271394014, + "timestamp": "2025-09-10 02:50:50.926487", + "step": 5483, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:50.980231", + "step": 5483, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004140017263125628, + "timestamp": "2025-09-10 02:50:50.990644", + "step": 5484, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:51.043714", + "step": 5484, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001005968195386231, + "timestamp": "2025-09-10 02:50:51.046109", + "step": 5485, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:51.099415", + "step": 5485, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004888789844699204, + "timestamp": "2025-09-10 02:50:51.101997", + "step": 5486, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:51.155772", + "step": 5486, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001547732827020809, + "timestamp": "2025-09-10 02:50:51.165324", + "step": 5487, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:51.219491", + "step": 5487, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.355550719192252e-05, + "timestamp": "2025-09-10 02:50:51.225943", + "step": 5488, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:51.278847", + "step": 5488, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025125700631178916, + "timestamp": "2025-09-10 02:50:51.281162", + "step": 5489, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:51.334444", + "step": 5489, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023470858577638865, + "timestamp": "2025-09-10 02:50:51.336770", + "step": 5490, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:51.390117", + "step": 5490, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006363871973007917, + "timestamp": "2025-09-10 02:50:51.393182", + "step": 5491, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:51.447488", + "step": 5491, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032735734712332487, + "timestamp": "2025-09-10 02:50:51.456537", + "step": 5492, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:51.511171", + "step": 5492, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002292569959536195, + "timestamp": "2025-09-10 02:50:51.513329", + "step": 5493, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:51.567198", + "step": 5493, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014479009434580803, + "timestamp": "2025-09-10 02:50:51.569549", + "step": 5494, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:51.623138", + "step": 5494, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002753959270194173, + "timestamp": "2025-09-10 02:50:51.625757", + "step": 5495, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:51.690914", + "step": 5495, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003543792699929327, + "timestamp": "2025-09-10 02:50:51.702419", + "step": 5496, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:51.755201", + "step": 5496, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.240380313713104e-05, + "timestamp": "2025-09-10 02:50:51.757323", + "step": 5497, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:50:51.825216", + "step": 5497, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.237787991063669e-05, + "timestamp": "2025-09-10 02:50:51.837758", + "step": 5498, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:51.891465", + "step": 5498, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.766897710505873e-05, + "timestamp": "2025-09-10 02:50:51.901081", + "step": 5499, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:50:51.968010", + "step": 5499, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002392894821241498, + "timestamp": "2025-09-10 02:50:51.981046", + "step": 5500, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 5500", + "timestamp": "2025-09-10 02:50:52.559884", + "step": 5500, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:52.616981", + "step": 5500, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007339601870626211, + "timestamp": "2025-09-10 02:50:52.619043", + "step": 5501, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:50:52.679935", + "step": 5501, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016605450073257089, + "timestamp": "2025-09-10 02:50:52.690772", + "step": 5502, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:52.744575", + "step": 5502, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005885217688046396, + "timestamp": "2025-09-10 02:50:52.747206", + "step": 5503, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:52.800863", + "step": 5503, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009954218985512853, + "timestamp": "2025-09-10 02:50:52.806791", + "step": 5504, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:50:52.886207", + "step": 5504, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006120841135270894, + "timestamp": "2025-09-10 02:50:52.902746", + "step": 5505, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:52.957496", + "step": 5505, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008259991300292313, + "timestamp": "2025-09-10 02:50:52.959834", + "step": 5506, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:53.014056", + "step": 5506, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005346500314772129, + "timestamp": "2025-09-10 02:50:53.016444", + "step": 5507, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:53.070291", + "step": 5507, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002331335563212633, + "timestamp": "2025-09-10 02:50:53.076467", + "step": 5508, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:53.128700", + "step": 5508, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048764521488919854, + "timestamp": "2025-09-10 02:50:53.131279", + "step": 5509, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:53.189240", + "step": 5509, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007262100698426366, + "timestamp": "2025-09-10 02:50:53.199660", + "step": 5510, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:53.260692", + "step": 5510, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001912870560772717, + "timestamp": "2025-09-10 02:50:53.271331", + "step": 5511, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:53.327452", + "step": 5511, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015238898049574345, + "timestamp": "2025-09-10 02:50:53.334003", + "step": 5512, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:53.387770", + "step": 5512, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010233638749923557, + "timestamp": "2025-09-10 02:50:53.389897", + "step": 5513, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:53.443502", + "step": 5513, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008512347121722996, + "timestamp": "2025-09-10 02:50:53.445808", + "step": 5514, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:50:53.498636", + "step": 5514, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026342945056967437, + "timestamp": "2025-09-10 02:50:53.501073", + "step": 5515, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:53.555496", + "step": 5515, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027247951948083937, + "timestamp": "2025-09-10 02:50:53.566010", + "step": 5516, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:53.619010", + "step": 5516, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.927114812540822e-05, + "timestamp": "2025-09-10 02:50:53.621035", + "step": 5517, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:53.683017", + "step": 5517, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011841331142932177, + "timestamp": "2025-09-10 02:50:53.693749", + "step": 5518, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:53.748139", + "step": 5518, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004701241850852966, + "timestamp": "2025-09-10 02:50:53.750370", + "step": 5519, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:53.804539", + "step": 5519, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003399949346203357, + "timestamp": "2025-09-10 02:50:53.812880", + "step": 5520, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:53.867386", + "step": 5520, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.880938599351794e-05, + "timestamp": "2025-09-10 02:50:53.869567", + "step": 5521, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:53.924339", + "step": 5521, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.328742892947048e-05, + "timestamp": "2025-09-10 02:50:53.926792", + "step": 5522, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:53.979775", + "step": 5522, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.938553608255461e-05, + "timestamp": "2025-09-10 02:50:53.982100", + "step": 5523, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:54.035293", + "step": 5523, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015138849266804755, + "timestamp": "2025-09-10 02:50:54.041138", + "step": 5524, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:54.093972", + "step": 5524, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020000306540168822, + "timestamp": "2025-09-10 02:50:54.096498", + "step": 5525, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:54.157312", + "step": 5525, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018442697182763368, + "timestamp": "2025-09-10 02:50:54.168043", + "step": 5526, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:54.221881", + "step": 5526, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007379500311799347, + "timestamp": "2025-09-10 02:50:54.224265", + "step": 5527, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:54.277993", + "step": 5527, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.845731983659789e-05, + "timestamp": "2025-09-10 02:50:54.284071", + "step": 5528, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:54.336773", + "step": 5528, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001918896014103666, + "timestamp": "2025-09-10 02:50:54.343235", + "step": 5529, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:54.396334", + "step": 5529, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002484775031916797, + "timestamp": "2025-09-10 02:50:54.402905", + "step": 5530, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:54.467149", + "step": 5530, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002841740206349641, + "timestamp": "2025-09-10 02:50:54.477850", + "step": 5531, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:54.536160", + "step": 5531, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030048511689528823, + "timestamp": "2025-09-10 02:50:54.547366", + "step": 5532, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:54.602708", + "step": 5532, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01263501774519682, + "timestamp": "2025-09-10 02:50:54.608893", + "step": 5533, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:54.662262", + "step": 5533, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000945459702052176, + "timestamp": "2025-09-10 02:50:54.665870", + "step": 5534, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:54.719149", + "step": 5534, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015531423559878021, + "timestamp": "2025-09-10 02:50:54.721271", + "step": 5535, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:54.774373", + "step": 5535, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013516255654394627, + "timestamp": "2025-09-10 02:50:54.782980", + "step": 5536, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:50:54.836966", + "step": 5536, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001032380314427428, + "timestamp": "2025-09-10 02:50:54.838959", + "step": 5537, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:54.891814", + "step": 5537, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046399797429330647, + "timestamp": "2025-09-10 02:50:54.893949", + "step": 5538, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:54.946779", + "step": 5538, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003333929053042084, + "timestamp": "2025-09-10 02:50:54.953380", + "step": 5539, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:55.006195", + "step": 5539, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011815697886049747, + "timestamp": "2025-09-10 02:50:55.012225", + "step": 5540, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:55.066239", + "step": 5540, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.934136712923646e-05, + "timestamp": "2025-09-10 02:50:55.068415", + "step": 5541, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:55.121428", + "step": 5541, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.425570220220834e-05, + "timestamp": "2025-09-10 02:50:55.123796", + "step": 5542, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:55.177782", + "step": 5542, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00948548037558794, + "timestamp": "2025-09-10 02:50:55.187429", + "step": 5543, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:55.242313", + "step": 5543, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003287082363385707, + "timestamp": "2025-09-10 02:50:55.248827", + "step": 5544, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:55.302270", + "step": 5544, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03937207907438278, + "timestamp": "2025-09-10 02:50:55.305111", + "step": 5545, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:50:55.360480", + "step": 5545, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023267955984920263, + "timestamp": "2025-09-10 02:50:55.370273", + "step": 5546, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:55.424337", + "step": 5546, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011005288688465953, + "timestamp": "2025-09-10 02:50:55.430311", + "step": 5547, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:55.485859", + "step": 5547, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03789230063557625, + "timestamp": "2025-09-10 02:50:55.491761", + "step": 5548, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:55.544789", + "step": 5548, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007854384020902216, + "timestamp": "2025-09-10 02:50:55.546910", + "step": 5549, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:50:55.600424", + "step": 5549, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.665223867865279e-05, + "timestamp": "2025-09-10 02:50:55.602513", + "step": 5550, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:55.656182", + "step": 5550, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001957099651917815, + "timestamp": "2025-09-10 02:50:55.665793", + "step": 5551, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:50:55.733816", + "step": 5551, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036005611764267087, + "timestamp": "2025-09-10 02:50:55.747262", + "step": 5552, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:55.800075", + "step": 5552, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.9516892027459107e-05, + "timestamp": "2025-09-10 02:50:55.802198", + "step": 5553, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:55.855236", + "step": 5553, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003146077273413539, + "timestamp": "2025-09-10 02:50:55.857474", + "step": 5554, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:50:55.931067", + "step": 5554, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006576284300535917, + "timestamp": "2025-09-10 02:50:55.944782", + "step": 5555, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:55.997950", + "step": 5555, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001338785223197192, + "timestamp": "2025-09-10 02:50:56.003689", + "step": 5556, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:56.056429", + "step": 5556, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001883771037682891, + "timestamp": "2025-09-10 02:50:56.058476", + "step": 5557, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:56.111591", + "step": 5557, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005519331898540258, + "timestamp": "2025-09-10 02:50:56.114595", + "step": 5558, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:50:56.183292", + "step": 5558, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.991384675027803e-05, + "timestamp": "2025-09-10 02:50:56.196048", + "step": 5559, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:50:56.257187", + "step": 5559, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.897096606437117e-05, + "timestamp": "2025-09-10 02:50:56.268709", + "step": 5560, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:50:56.328946", + "step": 5560, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011606734187807888, + "timestamp": "2025-09-10 02:50:56.340926", + "step": 5561, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:50:56.396302", + "step": 5561, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009803232969716191, + "timestamp": "2025-09-10 02:50:56.398346", + "step": 5562, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:56.452403", + "step": 5562, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026255848351866007, + "timestamp": "2025-09-10 02:50:56.454449", + "step": 5563, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:56.508528", + "step": 5563, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004570243472699076, + "timestamp": "2025-09-10 02:50:56.516514", + "step": 5564, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:56.577886", + "step": 5564, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.214964039623737e-05, + "timestamp": "2025-09-10 02:50:56.580293", + "step": 5565, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:56.637961", + "step": 5565, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032835971796885133, + "timestamp": "2025-09-10 02:50:56.644434", + "step": 5566, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:50:56.722700", + "step": 5566, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.543814328731969e-05, + "timestamp": "2025-09-10 02:50:56.736429", + "step": 5567, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:50:56.804505", + "step": 5567, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003689560398925096, + "timestamp": "2025-09-10 02:50:56.817845", + "step": 5568, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:56.871219", + "step": 5568, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021741418458987027, + "timestamp": "2025-09-10 02:50:56.873500", + "step": 5569, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:50:56.927113", + "step": 5569, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002533073420636356, + "timestamp": "2025-09-10 02:50:56.933540", + "step": 5570, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:50:56.987355", + "step": 5570, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006551762926392257, + "timestamp": "2025-09-10 02:50:56.996965", + "step": 5571, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:57.050204", + "step": 5571, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001837767194956541, + "timestamp": "2025-09-10 02:50:57.056298", + "step": 5572, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:57.109527", + "step": 5572, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.328331361757591e-05, + "timestamp": "2025-09-10 02:50:57.111612", + "step": 5573, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:50:57.174219", + "step": 5573, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003454013785813004, + "timestamp": "2025-09-10 02:50:57.185326", + "step": 5574, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:50:57.245285", + "step": 5574, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008125525782816112, + "timestamp": "2025-09-10 02:50:57.255727", + "step": 5575, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:57.310342", + "step": 5575, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.21983300941065e-05, + "timestamp": "2025-09-10 02:50:57.316376", + "step": 5576, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:57.369435", + "step": 5576, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0045641278848052025, + "timestamp": "2025-09-10 02:50:57.371364", + "step": 5577, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:57.424501", + "step": 5577, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022250918846111745, + "timestamp": "2025-09-10 02:50:57.426829", + "step": 5578, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:50:57.479765", + "step": 5578, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028944681980647147, + "timestamp": "2025-09-10 02:50:57.487803", + "step": 5579, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:57.541304", + "step": 5579, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016156604513525963, + "timestamp": "2025-09-10 02:50:57.547080", + "step": 5580, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:57.599502", + "step": 5580, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009447270072996616, + "timestamp": "2025-09-10 02:50:57.602387", + "step": 5581, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:50:57.655437", + "step": 5581, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012968778610229492, + "timestamp": "2025-09-10 02:50:57.657657", + "step": 5582, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:50:57.710990", + "step": 5582, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002128742344211787, + "timestamp": "2025-09-10 02:50:57.712829", + "step": 5583, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:50:57.765464", + "step": 5583, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008937938255257905, + "timestamp": "2025-09-10 02:50:57.771036", + "step": 5584, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:50:57.823641", + "step": 5584, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001554557355120778, + "timestamp": "2025-09-10 02:50:57.826579", + "step": 5585, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:50:57.879486", + "step": 5585, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005392265156842768, + "timestamp": "2025-09-10 02:50:57.881676", + "step": 5586, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:51:15.287716", + "step": 5586, + "epoch": 3 + }, + { + "type": "pplx", + "content": 21562904.896350686, + "timestamp": "2025-09-10 02:51:15.291034", + "step": 5586, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:15.345302", + "step": 5586, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002189187944168225, + "timestamp": "2025-09-10 02:51:15.347597", + "step": 5587, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:15.405770", + "step": 5587, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005762639455497265, + "timestamp": "2025-09-10 02:51:15.417010", + "step": 5588, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:15.471022", + "step": 5588, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034079679753631353, + "timestamp": "2025-09-10 02:51:15.473267", + "step": 5589, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:15.527110", + "step": 5589, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013979661162011325, + "timestamp": "2025-09-10 02:51:15.529145", + "step": 5590, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:15.582826", + "step": 5590, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018825005972757936, + "timestamp": "2025-09-10 02:51:15.584956", + "step": 5591, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:15.639262", + "step": 5591, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014448349829763174, + "timestamp": "2025-09-10 02:51:15.647050", + "step": 5592, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:15.701998", + "step": 5592, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.661448034923524e-05, + "timestamp": "2025-09-10 02:51:15.708441", + "step": 5593, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:15.765633", + "step": 5593, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022054556757211685, + "timestamp": "2025-09-10 02:51:15.767690", + "step": 5594, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:51:15.822329", + "step": 5594, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024557029246352613, + "timestamp": "2025-09-10 02:51:15.832187", + "step": 5595, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:15.886457", + "step": 5595, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0070946416817605495, + "timestamp": "2025-09-10 02:51:15.895548", + "step": 5596, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:15.949269", + "step": 5596, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019243491988163441, + "timestamp": "2025-09-10 02:51:15.954918", + "step": 5597, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:16.011547", + "step": 5597, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012800561671610922, + "timestamp": "2025-09-10 02:51:16.013954", + "step": 5598, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:16.067055", + "step": 5598, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013608066365122795, + "timestamp": "2025-09-10 02:51:16.073457", + "step": 5599, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:16.127115", + "step": 5599, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.2568775825202465e-05, + "timestamp": "2025-09-10 02:51:16.133121", + "step": 5600, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 784 + ], + "flops": 15680095254592.0 + }, + "timestamp": "2025-09-10 02:51:16.244735", + "step": 5600, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00547827547416091, + "timestamp": "2025-09-10 02:51:16.268995", + "step": 5601, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:51:16.324313", + "step": 5601, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016723232110962272, + "timestamp": "2025-09-10 02:51:16.334116", + "step": 5602, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:16.387570", + "step": 5602, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008668908849358559, + "timestamp": "2025-09-10 02:51:16.389681", + "step": 5603, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:16.442579", + "step": 5603, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.555887375725433e-05, + "timestamp": "2025-09-10 02:51:16.451409", + "step": 5604, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:16.503988", + "step": 5604, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004841067420784384, + "timestamp": "2025-09-10 02:51:16.510300", + "step": 5605, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:51:16.578976", + "step": 5605, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002817298227455467, + "timestamp": "2025-09-10 02:51:16.591664", + "step": 5606, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:16.644881", + "step": 5606, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006291944882832468, + "timestamp": "2025-09-10 02:51:16.647037", + "step": 5607, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:16.700166", + "step": 5607, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028438554727472365, + "timestamp": "2025-09-10 02:51:16.706223", + "step": 5608, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:16.758884", + "step": 5608, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002940757549367845, + "timestamp": "2025-09-10 02:51:16.761571", + "step": 5609, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:16.815242", + "step": 5609, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002263123169541359, + "timestamp": "2025-09-10 02:51:16.817591", + "step": 5610, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:16.870650", + "step": 5610, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012129464885219932, + "timestamp": "2025-09-10 02:51:16.872860", + "step": 5611, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:16.925643", + "step": 5611, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016381089517381042, + "timestamp": "2025-09-10 02:51:16.931724", + "step": 5612, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:16.984213", + "step": 5612, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024286587722599506, + "timestamp": "2025-09-10 02:51:16.987184", + "step": 5613, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:17.040259", + "step": 5613, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007025203667581081, + "timestamp": "2025-09-10 02:51:17.043790", + "step": 5614, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:17.097547", + "step": 5614, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.814678322873078e-05, + "timestamp": "2025-09-10 02:51:17.099793", + "step": 5615, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:17.152529", + "step": 5615, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010319777065888047, + "timestamp": "2025-09-10 02:51:17.158470", + "step": 5616, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:17.211270", + "step": 5616, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005186051712371409, + "timestamp": "2025-09-10 02:51:17.213595", + "step": 5617, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:51:17.280256", + "step": 5617, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016246965969912708, + "timestamp": "2025-09-10 02:51:17.292476", + "step": 5618, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:17.346352", + "step": 5618, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008623627363704145, + "timestamp": "2025-09-10 02:51:17.352708", + "step": 5619, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:17.405630", + "step": 5619, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019034624099731445, + "timestamp": "2025-09-10 02:51:17.411578", + "step": 5620, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:51:17.484073", + "step": 5620, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028661653050221503, + "timestamp": "2025-09-10 02:51:17.499310", + "step": 5621, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:51:17.559957", + "step": 5621, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014885196287650615, + "timestamp": "2025-09-10 02:51:17.570700", + "step": 5622, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:17.624566", + "step": 5622, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035353959538042545, + "timestamp": "2025-09-10 02:51:17.627043", + "step": 5623, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:17.680286", + "step": 5623, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007466920651495457, + "timestamp": "2025-09-10 02:51:17.686271", + "step": 5624, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:17.738735", + "step": 5624, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.3479107009479776e-05, + "timestamp": "2025-09-10 02:51:17.740976", + "step": 5625, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:17.793780", + "step": 5625, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002028747258009389, + "timestamp": "2025-09-10 02:51:17.796238", + "step": 5626, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:17.849926", + "step": 5626, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012403483560774475, + "timestamp": "2025-09-10 02:51:17.853349", + "step": 5627, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:17.909952", + "step": 5627, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001594047644175589, + "timestamp": "2025-09-10 02:51:17.915992", + "step": 5628, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:17.969204", + "step": 5628, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006895575788803399, + "timestamp": "2025-09-10 02:51:17.971292", + "step": 5629, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:18.024178", + "step": 5629, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022008655651006848, + "timestamp": "2025-09-10 02:51:18.026564", + "step": 5630, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:18.079842", + "step": 5630, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.427882130490616e-05, + "timestamp": "2025-09-10 02:51:18.082072", + "step": 5631, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:18.135130", + "step": 5631, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.123433119384572e-05, + "timestamp": "2025-09-10 02:51:18.141377", + "step": 5632, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:18.194232", + "step": 5632, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001261413999600336, + "timestamp": "2025-09-10 02:51:18.197035", + "step": 5633, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:18.250716", + "step": 5633, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015730482118669897, + "timestamp": "2025-09-10 02:51:18.252944", + "step": 5634, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:18.307541", + "step": 5634, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.122440001694486e-05, + "timestamp": "2025-09-10 02:51:18.309581", + "step": 5635, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:51:18.364021", + "step": 5635, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010261707211611792, + "timestamp": "2025-09-10 02:51:18.374607", + "step": 5636, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:18.427138", + "step": 5636, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017502308764960617, + "timestamp": "2025-09-10 02:51:18.435321", + "step": 5637, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:18.488714", + "step": 5637, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001294859393965453, + "timestamp": "2025-09-10 02:51:18.490887", + "step": 5638, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:18.543941", + "step": 5638, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035855176392942667, + "timestamp": "2025-09-10 02:51:18.550343", + "step": 5639, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:51:18.611969", + "step": 5639, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014155091776046902, + "timestamp": "2025-09-10 02:51:18.623585", + "step": 5640, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:18.676350", + "step": 5640, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005143466405570507, + "timestamp": "2025-09-10 02:51:18.678780", + "step": 5641, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:18.731355", + "step": 5641, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018331704195588827, + "timestamp": "2025-09-10 02:51:18.734351", + "step": 5642, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:18.787806", + "step": 5642, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.844376523280516e-05, + "timestamp": "2025-09-10 02:51:18.794283", + "step": 5643, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:18.847211", + "step": 5643, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004243445582687855, + "timestamp": "2025-09-10 02:51:18.853206", + "step": 5644, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:18.905796", + "step": 5644, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001895484165288508, + "timestamp": "2025-09-10 02:51:18.908663", + "step": 5645, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:18.961549", + "step": 5645, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00258989492431283, + "timestamp": "2025-09-10 02:51:18.969592", + "step": 5646, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:19.024801", + "step": 5646, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010115856275660917, + "timestamp": "2025-09-10 02:51:19.026984", + "step": 5647, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:19.080966", + "step": 5647, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011619441211223602, + "timestamp": "2025-09-10 02:51:19.091332", + "step": 5648, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:19.148581", + "step": 5648, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019926049571949989, + "timestamp": "2025-09-10 02:51:19.150934", + "step": 5649, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:19.203687", + "step": 5649, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004339973093010485, + "timestamp": "2025-09-10 02:51:19.206012", + "step": 5650, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:19.264518", + "step": 5650, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011778261978179216, + "timestamp": "2025-09-10 02:51:19.272668", + "step": 5651, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:19.325213", + "step": 5651, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022192489996086806, + "timestamp": "2025-09-10 02:51:19.331121", + "step": 5652, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:19.387059", + "step": 5652, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000944877858273685, + "timestamp": "2025-09-10 02:51:19.389247", + "step": 5653, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:19.442320", + "step": 5653, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040989063563756645, + "timestamp": "2025-09-10 02:51:19.444540", + "step": 5654, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:51:19.515338", + "step": 5654, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010838153684744611, + "timestamp": "2025-09-10 02:51:19.527923", + "step": 5655, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:19.583173", + "step": 5655, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010888419637922198, + "timestamp": "2025-09-10 02:51:19.589163", + "step": 5656, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:19.640982", + "step": 5656, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004886123933829367, + "timestamp": "2025-09-10 02:51:19.643992", + "step": 5657, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:19.697491", + "step": 5657, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011198705033166334, + "timestamp": "2025-09-10 02:51:19.699476", + "step": 5658, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:19.752281", + "step": 5658, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004554561339318752, + "timestamp": "2025-09-10 02:51:19.757833", + "step": 5659, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:19.812724", + "step": 5659, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009690019069239497, + "timestamp": "2025-09-10 02:51:19.818566", + "step": 5660, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:19.871400", + "step": 5660, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010577777720754966, + "timestamp": "2025-09-10 02:51:19.873709", + "step": 5661, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:19.926233", + "step": 5661, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001331623352598399, + "timestamp": "2025-09-10 02:51:19.928457", + "step": 5662, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:19.981125", + "step": 5662, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002772526117041707, + "timestamp": "2025-09-10 02:51:19.984086", + "step": 5663, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:20.036585", + "step": 5663, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.7308487410191447e-05, + "timestamp": "2025-09-10 02:51:20.042504", + "step": 5664, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:20.095374", + "step": 5664, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002683405764400959, + "timestamp": "2025-09-10 02:51:20.098250", + "step": 5665, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:20.151062", + "step": 5665, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014960307453293353, + "timestamp": "2025-09-10 02:51:20.159360", + "step": 5666, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:20.212715", + "step": 5666, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002826147014275193, + "timestamp": "2025-09-10 02:51:20.215022", + "step": 5667, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:20.267819", + "step": 5667, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011734537547454238, + "timestamp": "2025-09-10 02:51:20.275136", + "step": 5668, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:20.331313", + "step": 5668, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0076494039967656136, + "timestamp": "2025-09-10 02:51:20.342546", + "step": 5669, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:20.396084", + "step": 5669, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040603900561109185, + "timestamp": "2025-09-10 02:51:20.402490", + "step": 5670, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:20.456547", + "step": 5670, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013736828230321407, + "timestamp": "2025-09-10 02:51:20.458953", + "step": 5671, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:20.512548", + "step": 5671, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00391897838562727, + "timestamp": "2025-09-10 02:51:20.518520", + "step": 5672, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:20.571205", + "step": 5672, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002297357888892293, + "timestamp": "2025-09-10 02:51:20.573246", + "step": 5673, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:20.626809", + "step": 5673, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001491195143898949, + "timestamp": "2025-09-10 02:51:20.629149", + "step": 5674, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:20.683650", + "step": 5674, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038279959699139, + "timestamp": "2025-09-10 02:51:20.685961", + "step": 5675, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:51:20.747644", + "step": 5675, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.82403294579126e-05, + "timestamp": "2025-09-10 02:51:20.759530", + "step": 5676, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:20.812556", + "step": 5676, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.07603979110717773, + "timestamp": "2025-09-10 02:51:20.822880", + "step": 5677, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:51:20.884225", + "step": 5677, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014555695815943182, + "timestamp": "2025-09-10 02:51:20.895166", + "step": 5678, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:20.948219", + "step": 5678, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014404937624931335, + "timestamp": "2025-09-10 02:51:20.950395", + "step": 5679, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:21.003081", + "step": 5679, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005144585738889873, + "timestamp": "2025-09-10 02:51:21.008960", + "step": 5680, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:21.061290", + "step": 5680, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.894156386901159e-05, + "timestamp": "2025-09-10 02:51:21.063588", + "step": 5681, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:51:21.118217", + "step": 5681, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.2053729228209704e-05, + "timestamp": "2025-09-10 02:51:21.128017", + "step": 5682, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 576 + ], + "flops": 11520070000896.0 + }, + "timestamp": "2025-09-10 02:51:21.211670", + "step": 5682, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046526288497261703, + "timestamp": "2025-09-10 02:51:21.227199", + "step": 5683, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:21.280385", + "step": 5683, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005217741127125919, + "timestamp": "2025-09-10 02:51:21.287750", + "step": 5684, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:21.341218", + "step": 5684, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001844012294895947, + "timestamp": "2025-09-10 02:51:21.343400", + "step": 5685, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:21.395989", + "step": 5685, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.369120769202709e-05, + "timestamp": "2025-09-10 02:51:21.398951", + "step": 5686, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:51:21.459287", + "step": 5686, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.50988838262856e-05, + "timestamp": "2025-09-10 02:51:21.470016", + "step": 5687, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:51:21.522770", + "step": 5687, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.87357533024624e-05, + "timestamp": "2025-09-10 02:51:21.528669", + "step": 5688, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:21.580885", + "step": 5688, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005220349412411451, + "timestamp": "2025-09-10 02:51:21.583009", + "step": 5689, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:21.635745", + "step": 5689, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001456657046219334, + "timestamp": "2025-09-10 02:51:21.638148", + "step": 5690, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:21.690952", + "step": 5690, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006287423893809319, + "timestamp": "2025-09-10 02:51:21.693497", + "step": 5691, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:51:21.750421", + "step": 5691, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001782461884431541, + "timestamp": "2025-09-10 02:51:21.756555", + "step": 5692, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:21.809175", + "step": 5692, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010749736247817054, + "timestamp": "2025-09-10 02:51:21.811234", + "step": 5693, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:21.864425", + "step": 5693, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.93696565576829e-05, + "timestamp": "2025-09-10 02:51:21.871023", + "step": 5694, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:21.924083", + "step": 5694, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.0167022234527394e-05, + "timestamp": "2025-09-10 02:51:21.926285", + "step": 5695, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:21.979643", + "step": 5695, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001911646395456046, + "timestamp": "2025-09-10 02:51:21.988658", + "step": 5696, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:22.041071", + "step": 5696, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010535532783251256, + "timestamp": "2025-09-10 02:51:22.043441", + "step": 5697, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:22.096443", + "step": 5697, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.176342943334021e-05, + "timestamp": "2025-09-10 02:51:22.098924", + "step": 5698, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:51:22.151447", + "step": 5698, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.99109324766323e-05, + "timestamp": "2025-09-10 02:51:22.154006", + "step": 5699, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:22.206533", + "step": 5699, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02412462793290615, + "timestamp": "2025-09-10 02:51:22.212476", + "step": 5700, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:22.264918", + "step": 5700, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009148859535343945, + "timestamp": "2025-09-10 02:51:22.267178", + "step": 5701, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:22.325933", + "step": 5701, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013624512357637286, + "timestamp": "2025-09-10 02:51:22.336382", + "step": 5702, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:22.389500", + "step": 5702, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.784575503435917e-05, + "timestamp": "2025-09-10 02:51:22.391735", + "step": 5703, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:22.445558", + "step": 5703, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.188021456589922e-05, + "timestamp": "2025-09-10 02:51:22.455967", + "step": 5704, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:22.509452", + "step": 5704, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016375510022044182, + "timestamp": "2025-09-10 02:51:22.511636", + "step": 5705, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:22.564630", + "step": 5705, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.4782795157516375e-05, + "timestamp": "2025-09-10 02:51:22.566879", + "step": 5706, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:22.619543", + "step": 5706, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040555463056080043, + "timestamp": "2025-09-10 02:51:22.621751", + "step": 5707, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:22.674404", + "step": 5707, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0751379132270813, + "timestamp": "2025-09-10 02:51:22.679989", + "step": 5708, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:22.732222", + "step": 5708, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.4346985810552724e-05, + "timestamp": "2025-09-10 02:51:22.735254", + "step": 5709, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:22.787882", + "step": 5709, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011057908704970032, + "timestamp": "2025-09-10 02:51:22.790925", + "step": 5710, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:22.844495", + "step": 5710, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.72880157455802e-05, + "timestamp": "2025-09-10 02:51:22.854142", + "step": 5711, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:22.907062", + "step": 5711, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.590354602551088e-05, + "timestamp": "2025-09-10 02:51:22.912790", + "step": 5712, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:22.966159", + "step": 5712, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.515549027128145e-05, + "timestamp": "2025-09-10 02:51:22.968599", + "step": 5713, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:51:23.023147", + "step": 5713, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010020162881119177, + "timestamp": "2025-09-10 02:51:23.032937", + "step": 5714, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:23.085893", + "step": 5714, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007063016528263688, + "timestamp": "2025-09-10 02:51:23.088229", + "step": 5715, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:23.142211", + "step": 5715, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003517144825309515, + "timestamp": "2025-09-10 02:51:23.152621", + "step": 5716, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:23.205394", + "step": 5716, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05250024423003197, + "timestamp": "2025-09-10 02:51:23.207683", + "step": 5717, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:23.260699", + "step": 5717, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010178987868130207, + "timestamp": "2025-09-10 02:51:23.267267", + "step": 5718, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:23.320642", + "step": 5718, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.096639066934586e-05, + "timestamp": "2025-09-10 02:51:23.327045", + "step": 5719, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:23.380055", + "step": 5719, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007258725236169994, + "timestamp": "2025-09-10 02:51:23.385640", + "step": 5720, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:23.437504", + "step": 5720, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.05341187515296e-05, + "timestamp": "2025-09-10 02:51:23.439634", + "step": 5721, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:23.492589", + "step": 5721, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031913447310216725, + "timestamp": "2025-09-10 02:51:23.494767", + "step": 5722, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:23.548585", + "step": 5722, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004898604936897755, + "timestamp": "2025-09-10 02:51:23.550777", + "step": 5723, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:23.603890", + "step": 5723, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018130905227735639, + "timestamp": "2025-09-10 02:51:23.611716", + "step": 5724, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:23.663944", + "step": 5724, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022090536367613822, + "timestamp": "2025-09-10 02:51:23.670662", + "step": 5725, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:23.723821", + "step": 5725, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000243786969804205, + "timestamp": "2025-09-10 02:51:23.725975", + "step": 5726, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:23.778599", + "step": 5726, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016002205666154623, + "timestamp": "2025-09-10 02:51:23.780910", + "step": 5727, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:23.833318", + "step": 5727, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002175274130422622, + "timestamp": "2025-09-10 02:51:23.838884", + "step": 5728, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:23.891221", + "step": 5728, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.111921099247411e-05, + "timestamp": "2025-09-10 02:51:23.899688", + "step": 5729, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:23.952124", + "step": 5729, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0296839140355587, + "timestamp": "2025-09-10 02:51:23.955288", + "step": 5730, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:24.009295", + "step": 5730, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001510259578935802, + "timestamp": "2025-09-10 02:51:24.011600", + "step": 5731, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:24.064918", + "step": 5731, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001774175325408578, + "timestamp": "2025-09-10 02:51:24.073732", + "step": 5732, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:51:24.140523", + "step": 5732, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013746106065809727, + "timestamp": "2025-09-10 02:51:24.154142", + "step": 5733, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:51:41.046821", + "step": 5733, + "epoch": 3 + }, + { + "type": "pplx", + "content": 20482719.02242213, + "timestamp": "2025-09-10 02:51:41.049810", + "step": 5733, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:41.103976", + "step": 5733, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010244191624224186, + "timestamp": "2025-09-10 02:51:41.106004", + "step": 5734, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:51:41.159201", + "step": 5734, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009839391568675637, + "timestamp": "2025-09-10 02:51:41.161433", + "step": 5735, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:41.214910", + "step": 5735, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0067941914312541485, + "timestamp": "2025-09-10 02:51:41.220980", + "step": 5736, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:51:41.274429", + "step": 5736, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013374501140788198, + "timestamp": "2025-09-10 02:51:41.284929", + "step": 5737, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:51:41.346637", + "step": 5737, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01822042651474476, + "timestamp": "2025-09-10 02:51:41.357665", + "step": 5738, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:51:41.425500", + "step": 5738, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.684946179622784e-05, + "timestamp": "2025-09-10 02:51:41.438064", + "step": 5739, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:41.496007", + "step": 5739, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015893857926130295, + "timestamp": "2025-09-10 02:51:41.507250", + "step": 5740, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:41.559909", + "step": 5740, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013877787569072098, + "timestamp": "2025-09-10 02:51:41.569894", + "step": 5741, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:51:41.631171", + "step": 5741, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012965899659320712, + "timestamp": "2025-09-10 02:51:41.642291", + "step": 5742, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:41.695033", + "step": 5742, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033993370016105473, + "timestamp": "2025-09-10 02:51:41.703273", + "step": 5743, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:41.756556", + "step": 5743, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015266095579136163, + "timestamp": "2025-09-10 02:51:41.763776", + "step": 5744, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:41.815884", + "step": 5744, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008721463964320719, + "timestamp": "2025-09-10 02:51:41.817790", + "step": 5745, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:41.870584", + "step": 5745, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016237528761848807, + "timestamp": "2025-09-10 02:51:41.872772", + "step": 5746, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:41.925005", + "step": 5746, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007079765782691538, + "timestamp": "2025-09-10 02:51:41.927049", + "step": 5747, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:41.979407", + "step": 5747, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002578176325187087, + "timestamp": "2025-09-10 02:51:41.985096", + "step": 5748, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:42.037779", + "step": 5748, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005948370671831071, + "timestamp": "2025-09-10 02:51:42.039973", + "step": 5749, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:42.092453", + "step": 5749, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009538509184494615, + "timestamp": "2025-09-10 02:51:42.094560", + "step": 5750, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:51:42.149335", + "step": 5750, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001113768434152007, + "timestamp": "2025-09-10 02:51:42.159146", + "step": 5751, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:42.211904", + "step": 5751, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.385928357602097e-05, + "timestamp": "2025-09-10 02:51:42.217605", + "step": 5752, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:42.269610", + "step": 5752, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002059690887108445, + "timestamp": "2025-09-10 02:51:42.271561", + "step": 5753, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:42.325161", + "step": 5753, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014517420902848244, + "timestamp": "2025-09-10 02:51:42.334556", + "step": 5754, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:51:42.395940", + "step": 5754, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003307890146970749, + "timestamp": "2025-09-10 02:51:42.407049", + "step": 5755, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:42.460336", + "step": 5755, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009655895410105586, + "timestamp": "2025-09-10 02:51:42.467548", + "step": 5756, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:42.519814", + "step": 5756, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009693386382423341, + "timestamp": "2025-09-10 02:51:42.521901", + "step": 5757, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:42.574571", + "step": 5757, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013732363004237413, + "timestamp": "2025-09-10 02:51:42.576743", + "step": 5758, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:42.629751", + "step": 5758, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044860862544737756, + "timestamp": "2025-09-10 02:51:42.639308", + "step": 5759, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:51:42.707895", + "step": 5759, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012755978386849165, + "timestamp": "2025-09-10 02:51:42.721355", + "step": 5760, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:42.773804", + "step": 5760, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041840082849375904, + "timestamp": "2025-09-10 02:51:42.775872", + "step": 5761, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:42.828350", + "step": 5761, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05786401778459549, + "timestamp": "2025-09-10 02:51:42.830559", + "step": 5762, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:42.883887", + "step": 5762, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017297634622082114, + "timestamp": "2025-09-10 02:51:42.893469", + "step": 5763, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:42.947075", + "step": 5763, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013945087557658553, + "timestamp": "2025-09-10 02:51:42.952714", + "step": 5764, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:43.004534", + "step": 5764, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003477123682387173, + "timestamp": "2025-09-10 02:51:43.011146", + "step": 5765, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:43.069471", + "step": 5765, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005236553261056542, + "timestamp": "2025-09-10 02:51:43.079899", + "step": 5766, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:43.133118", + "step": 5766, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005685584037564695, + "timestamp": "2025-09-10 02:51:43.135571", + "step": 5767, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:43.193569", + "step": 5767, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002259068423882127, + "timestamp": "2025-09-10 02:51:43.204770", + "step": 5768, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:43.257614", + "step": 5768, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032246175687760115, + "timestamp": "2025-09-10 02:51:43.264146", + "step": 5769, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:43.316972", + "step": 5769, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004127685911953449, + "timestamp": "2025-09-10 02:51:43.319926", + "step": 5770, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:43.372386", + "step": 5770, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001799857011064887, + "timestamp": "2025-09-10 02:51:43.375441", + "step": 5771, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:43.428120", + "step": 5771, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010488808620721102, + "timestamp": "2025-09-10 02:51:43.433913", + "step": 5772, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:43.485775", + "step": 5772, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012435141252353787, + "timestamp": "2025-09-10 02:51:43.488814", + "step": 5773, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:43.541380", + "step": 5773, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005501917912624776, + "timestamp": "2025-09-10 02:51:43.544522", + "step": 5774, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:43.596879", + "step": 5774, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003421418077778071, + "timestamp": "2025-09-10 02:51:43.599106", + "step": 5775, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:51:43.659216", + "step": 5775, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022140939254313707, + "timestamp": "2025-09-10 02:51:43.670706", + "step": 5776, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:43.722932", + "step": 5776, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009579942561686039, + "timestamp": "2025-09-10 02:51:43.729525", + "step": 5777, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:51:43.802715", + "step": 5777, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020193222735542804, + "timestamp": "2025-09-10 02:51:43.816434", + "step": 5778, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:43.869534", + "step": 5778, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.025681912899017334, + "timestamp": "2025-09-10 02:51:43.871568", + "step": 5779, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:43.924417", + "step": 5779, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001249152235686779, + "timestamp": "2025-09-10 02:51:43.930121", + "step": 5780, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:43.982074", + "step": 5780, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004935614764690399, + "timestamp": "2025-09-10 02:51:43.984271", + "step": 5781, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:44.037775", + "step": 5781, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002590807562228292, + "timestamp": "2025-09-10 02:51:44.044372", + "step": 5782, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:44.097589", + "step": 5782, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007769867079332471, + "timestamp": "2025-09-10 02:51:44.100096", + "step": 5783, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:44.152771", + "step": 5783, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003105894138570875, + "timestamp": "2025-09-10 02:51:44.158475", + "step": 5784, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:44.210459", + "step": 5784, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.044892568141222, + "timestamp": "2025-09-10 02:51:44.217239", + "step": 5785, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:44.269678", + "step": 5785, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003026153426617384, + "timestamp": "2025-09-10 02:51:44.272761", + "step": 5786, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:44.324778", + "step": 5786, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016270544147118926, + "timestamp": "2025-09-10 02:51:44.327957", + "step": 5787, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:44.380295", + "step": 5787, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009035581722855568, + "timestamp": "2025-09-10 02:51:44.385855", + "step": 5788, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:44.437620", + "step": 5788, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004763353615999222, + "timestamp": "2025-09-10 02:51:44.439661", + "step": 5789, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:44.492022", + "step": 5789, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015156091831158847, + "timestamp": "2025-09-10 02:51:44.494897", + "step": 5790, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:44.551052", + "step": 5790, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022842036560177803, + "timestamp": "2025-09-10 02:51:44.553188", + "step": 5791, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:44.605894", + "step": 5791, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035799542092718184, + "timestamp": "2025-09-10 02:51:44.611459", + "step": 5792, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:44.663067", + "step": 5792, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005225968197919428, + "timestamp": "2025-09-10 02:51:44.666051", + "step": 5793, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:44.718390", + "step": 5793, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023316919396165758, + "timestamp": "2025-09-10 02:51:44.721696", + "step": 5794, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:44.773877", + "step": 5794, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001692460646154359, + "timestamp": "2025-09-10 02:51:44.775993", + "step": 5795, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:51:44.836577", + "step": 5795, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017555321392137557, + "timestamp": "2025-09-10 02:51:44.848291", + "step": 5796, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:44.900769", + "step": 5796, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014712891425006092, + "timestamp": "2025-09-10 02:51:44.903779", + "step": 5797, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:44.956480", + "step": 5797, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004672048962675035, + "timestamp": "2025-09-10 02:51:44.958793", + "step": 5798, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:45.011724", + "step": 5798, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013020833721384406, + "timestamp": "2025-09-10 02:51:45.019950", + "step": 5799, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:45.072629", + "step": 5799, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015009155322331935, + "timestamp": "2025-09-10 02:51:45.078530", + "step": 5800, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:45.130705", + "step": 5800, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018723929533734918, + "timestamp": "2025-09-10 02:51:45.138976", + "step": 5801, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:45.192391", + "step": 5801, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036644097417593002, + "timestamp": "2025-09-10 02:51:45.199130", + "step": 5802, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:45.252770", + "step": 5802, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022594413894694299, + "timestamp": "2025-09-10 02:51:45.255057", + "step": 5803, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:45.307878", + "step": 5803, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008001906680874527, + "timestamp": "2025-09-10 02:51:45.316929", + "step": 5804, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:45.369507", + "step": 5804, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003338546375744045, + "timestamp": "2025-09-10 02:51:45.371692", + "step": 5805, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:45.424449", + "step": 5805, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000219851266592741, + "timestamp": "2025-09-10 02:51:45.432846", + "step": 5806, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:45.486628", + "step": 5806, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014241269091144204, + "timestamp": "2025-09-10 02:51:45.496215", + "step": 5807, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:45.549146", + "step": 5807, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002949876943603158, + "timestamp": "2025-09-10 02:51:45.555014", + "step": 5808, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:45.607198", + "step": 5808, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029255712870508432, + "timestamp": "2025-09-10 02:51:45.613961", + "step": 5809, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:45.667230", + "step": 5809, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041943046380765736, + "timestamp": "2025-09-10 02:51:45.669664", + "step": 5810, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:45.722893", + "step": 5810, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004880022897850722, + "timestamp": "2025-09-10 02:51:45.725160", + "step": 5811, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:51:45.779473", + "step": 5811, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005901859840378165, + "timestamp": "2025-09-10 02:51:45.790075", + "step": 5812, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:51:45.856681", + "step": 5812, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024833696079440415, + "timestamp": "2025-09-10 02:51:45.869943", + "step": 5813, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:45.923333", + "step": 5813, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002008928422583267, + "timestamp": "2025-09-10 02:51:45.925339", + "step": 5814, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:45.978660", + "step": 5814, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006987524102441967, + "timestamp": "2025-09-10 02:51:45.981529", + "step": 5815, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:46.034014", + "step": 5815, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0054719215258955956, + "timestamp": "2025-09-10 02:51:46.039949", + "step": 5816, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:46.096034", + "step": 5816, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001059025409631431, + "timestamp": "2025-09-10 02:51:46.107300", + "step": 5817, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:46.160328", + "step": 5817, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013769164215773344, + "timestamp": "2025-09-10 02:51:46.166903", + "step": 5818, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:46.220236", + "step": 5818, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005804088432341814, + "timestamp": "2025-09-10 02:51:46.222536", + "step": 5819, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:46.275048", + "step": 5819, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022570193686988205, + "timestamp": "2025-09-10 02:51:46.281015", + "step": 5820, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:51:46.339754", + "step": 5820, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003626677207648754, + "timestamp": "2025-09-10 02:51:46.351214", + "step": 5821, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:46.405330", + "step": 5821, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018641487986315042, + "timestamp": "2025-09-10 02:51:46.414379", + "step": 5822, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:46.468736", + "step": 5822, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007334387628361583, + "timestamp": "2025-09-10 02:51:46.471170", + "step": 5823, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:46.525196", + "step": 5823, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023691621026955545, + "timestamp": "2025-09-10 02:51:46.531365", + "step": 5824, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:46.584010", + "step": 5824, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002022029075305909, + "timestamp": "2025-09-10 02:51:46.586148", + "step": 5825, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:46.639045", + "step": 5825, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030149130150675774, + "timestamp": "2025-09-10 02:51:46.641222", + "step": 5826, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:46.693774", + "step": 5826, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018455949611961842, + "timestamp": "2025-09-10 02:51:46.695778", + "step": 5827, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:46.748531", + "step": 5827, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003206153050996363, + "timestamp": "2025-09-10 02:51:46.754360", + "step": 5828, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:46.806908", + "step": 5828, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010543627431616187, + "timestamp": "2025-09-10 02:51:46.809167", + "step": 5829, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:46.862050", + "step": 5829, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002578561834525317, + "timestamp": "2025-09-10 02:51:46.865017", + "step": 5830, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:46.920190", + "step": 5830, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006444288301281631, + "timestamp": "2025-09-10 02:51:46.922352", + "step": 5831, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:46.975669", + "step": 5831, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003105739306192845, + "timestamp": "2025-09-10 02:51:46.981620", + "step": 5832, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:47.033950", + "step": 5832, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003395001171156764, + "timestamp": "2025-09-10 02:51:47.035946", + "step": 5833, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:47.089245", + "step": 5833, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013299237471073866, + "timestamp": "2025-09-10 02:51:47.092087", + "step": 5834, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:47.146140", + "step": 5834, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017916594515554607, + "timestamp": "2025-09-10 02:51:47.148319", + "step": 5835, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:47.201356", + "step": 5835, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008848682045936584, + "timestamp": "2025-09-10 02:51:47.207280", + "step": 5836, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:47.259684", + "step": 5836, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012626085663214326, + "timestamp": "2025-09-10 02:51:47.262495", + "step": 5837, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:47.314931", + "step": 5837, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020191511430311948, + "timestamp": "2025-09-10 02:51:47.317975", + "step": 5838, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:47.375622", + "step": 5838, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024575458373874426, + "timestamp": "2025-09-10 02:51:47.386055", + "step": 5839, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:47.439173", + "step": 5839, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005852826288901269, + "timestamp": "2025-09-10 02:51:47.445096", + "step": 5840, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:47.496952", + "step": 5840, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016624733107164502, + "timestamp": "2025-09-10 02:51:47.499088", + "step": 5841, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:47.552019", + "step": 5841, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002822458336595446, + "timestamp": "2025-09-10 02:51:47.554121", + "step": 5842, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:51:47.613034", + "step": 5842, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001824253995437175, + "timestamp": "2025-09-10 02:51:47.623349", + "step": 5843, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:47.677365", + "step": 5843, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014550441119354218, + "timestamp": "2025-09-10 02:51:47.683325", + "step": 5844, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:47.735165", + "step": 5844, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003948107361793518, + "timestamp": "2025-09-10 02:51:47.741668", + "step": 5845, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:47.794560", + "step": 5845, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008949020993895829, + "timestamp": "2025-09-10 02:51:47.796791", + "step": 5846, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:47.849535", + "step": 5846, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002585839480161667, + "timestamp": "2025-09-10 02:51:47.851836", + "step": 5847, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:47.905040", + "step": 5847, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041013467125594616, + "timestamp": "2025-09-10 02:51:47.910692", + "step": 5848, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:47.962702", + "step": 5848, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01508873701095581, + "timestamp": "2025-09-10 02:51:47.964942", + "step": 5849, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:48.018000", + "step": 5849, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001222583232447505, + "timestamp": "2025-09-10 02:51:48.019983", + "step": 5850, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:51:48.086194", + "step": 5850, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003196625621058047, + "timestamp": "2025-09-10 02:51:48.098443", + "step": 5851, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:48.151415", + "step": 5851, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007270271889865398, + "timestamp": "2025-09-10 02:51:48.157035", + "step": 5852, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:51:48.215674", + "step": 5852, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029898055363446474, + "timestamp": "2025-09-10 02:51:48.227258", + "step": 5853, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:48.279675", + "step": 5853, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.705479947617278e-05, + "timestamp": "2025-09-10 02:51:48.281986", + "step": 5854, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:48.334896", + "step": 5854, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006672864314168692, + "timestamp": "2025-09-10 02:51:48.337836", + "step": 5855, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:51:48.390988", + "step": 5855, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005213103140704334, + "timestamp": "2025-09-10 02:51:48.399753", + "step": 5856, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:48.453427", + "step": 5856, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009325035498477519, + "timestamp": "2025-09-10 02:51:48.455548", + "step": 5857, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:48.508801", + "step": 5857, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005977279506623745, + "timestamp": "2025-09-10 02:51:48.511361", + "step": 5858, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:48.564658", + "step": 5858, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.17429034516681e-05, + "timestamp": "2025-09-10 02:51:48.566855", + "step": 5859, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:51:48.619593", + "step": 5859, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032618644763715565, + "timestamp": "2025-09-10 02:51:48.625760", + "step": 5860, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:48.678549", + "step": 5860, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.650614083511755e-05, + "timestamp": "2025-09-10 02:51:48.680539", + "step": 5861, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:48.733581", + "step": 5861, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003374809166416526, + "timestamp": "2025-09-10 02:51:48.735738", + "step": 5862, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:48.788804", + "step": 5862, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004158054944127798, + "timestamp": "2025-09-10 02:51:48.791668", + "step": 5863, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:48.845069", + "step": 5863, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018565886421129107, + "timestamp": "2025-09-10 02:51:48.850818", + "step": 5864, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:48.903807", + "step": 5864, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003595920279622078, + "timestamp": "2025-09-10 02:51:48.906068", + "step": 5865, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:51:48.959243", + "step": 5865, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024408187717199326, + "timestamp": "2025-09-10 02:51:48.968864", + "step": 5866, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:49.022018", + "step": 5866, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012649646669160575, + "timestamp": "2025-09-10 02:51:49.024374", + "step": 5867, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:49.077918", + "step": 5867, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.017767686396837e-05, + "timestamp": "2025-09-10 02:51:49.084051", + "step": 5868, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:49.136551", + "step": 5868, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006468692445196211, + "timestamp": "2025-09-10 02:51:49.142987", + "step": 5869, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:49.195713", + "step": 5869, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004957037162967026, + "timestamp": "2025-09-10 02:51:49.198630", + "step": 5870, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:49.251648", + "step": 5870, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030178995802998543, + "timestamp": "2025-09-10 02:51:49.253868", + "step": 5871, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:51:49.306143", + "step": 5871, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027214823057875037, + "timestamp": "2025-09-10 02:51:49.311746", + "step": 5872, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:49.364033", + "step": 5872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001998594496399164, + "timestamp": "2025-09-10 02:51:49.365982", + "step": 5873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:51:49.418353", + "step": 5873, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003163004294037819, + "timestamp": "2025-09-10 02:51:49.420458", + "step": 5874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:51:49.473440", + "step": 5874, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031887299846857786, + "timestamp": "2025-09-10 02:51:49.480138", + "step": 5875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:49.532629", + "step": 5875, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019040309125557542, + "timestamp": "2025-09-10 02:51:49.538223", + "step": 5876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:51:49.589777", + "step": 5876, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017279988387599587, + "timestamp": "2025-09-10 02:51:49.592813", + "step": 5877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:51:49.646174", + "step": 5877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021360639948397875, + "timestamp": "2025-09-10 02:51:49.648274", + "step": 5878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:51:49.701324", + "step": 5878, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016497794422321022, + "timestamp": "2025-09-10 02:51:49.703391", + "step": 5879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:51:49.756052", + "step": 5879, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001751897216308862, + "timestamp": "2025-09-10 02:51:49.761695", + "step": 5880, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:52:06.495393", + "step": 5880, + "epoch": 3 + }, + { + "type": "pplx", + "content": 22582352.436606582, + "timestamp": "2025-09-10 02:52:06.498475", + "step": 5880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:06.553499", + "step": 5880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008500745752826333, + "timestamp": "2025-09-10 02:52:06.555694", + "step": 5881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:06.609318", + "step": 5881, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00237825489602983, + "timestamp": "2025-09-10 02:52:06.615373", + "step": 5882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:52:06.683587", + "step": 5882, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004402727645356208, + "timestamp": "2025-09-10 02:52:06.696184", + "step": 5883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:06.750171", + "step": 5883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014934144564904273, + "timestamp": "2025-09-10 02:52:06.757019", + "step": 5884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:06.814059", + "step": 5884, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.489151175832376e-05, + "timestamp": "2025-09-10 02:52:06.816864", + "step": 5885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:06.879648", + "step": 5885, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014372929581440985, + "timestamp": "2025-09-10 02:52:06.889378", + "step": 5886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:06.951898", + "step": 5886, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036258361069485545, + "timestamp": "2025-09-10 02:52:06.959600", + "step": 5887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:07.017794", + "step": 5887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001417129678884521, + "timestamp": "2025-09-10 02:52:07.024036", + "step": 5888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:52:07.092241", + "step": 5888, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01438497006893158, + "timestamp": "2025-09-10 02:52:07.106040", + "step": 5889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:07.161488", + "step": 5889, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014281366020441055, + "timestamp": "2025-09-10 02:52:07.169058", + "step": 5890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:07.225419", + "step": 5890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002134530106559396, + "timestamp": "2025-09-10 02:52:07.235247", + "step": 5891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:07.292899", + "step": 5891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002158203424187377, + "timestamp": "2025-09-10 02:52:07.298907", + "step": 5892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:07.352829", + "step": 5892, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.779220711905509e-05, + "timestamp": "2025-09-10 02:52:07.355341", + "step": 5893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:52:07.408285", + "step": 5893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001536442432552576, + "timestamp": "2025-09-10 02:52:07.411415", + "step": 5894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:07.469134", + "step": 5894, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.067933231359348e-05, + "timestamp": "2025-09-10 02:52:07.472237", + "step": 5895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:52:07.555434", + "step": 5895, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.683582902653143e-05, + "timestamp": "2025-09-10 02:52:07.570327", + "step": 5896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:07.627918", + "step": 5896, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027145136846229434, + "timestamp": "2025-09-10 02:52:07.636046", + "step": 5897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:07.692103", + "step": 5897, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.75391745264642e-05, + "timestamp": "2025-09-10 02:52:07.694163", + "step": 5898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:07.746973", + "step": 5898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006268133409321308, + "timestamp": "2025-09-10 02:52:07.749173", + "step": 5899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:07.802796", + "step": 5899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002908637106884271, + "timestamp": "2025-09-10 02:52:07.808926", + "step": 5900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:07.862815", + "step": 5900, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019580138905439526, + "timestamp": "2025-09-10 02:52:07.869841", + "step": 5901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:52:07.923100", + "step": 5901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005077117239125073, + "timestamp": "2025-09-10 02:52:07.925125", + "step": 5902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:07.978674", + "step": 5902, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008855224587023258, + "timestamp": "2025-09-10 02:52:07.980861", + "step": 5903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:08.033648", + "step": 5903, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028505848604254425, + "timestamp": "2025-09-10 02:52:08.039857", + "step": 5904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:08.092268", + "step": 5904, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020344446238595992, + "timestamp": "2025-09-10 02:52:08.102285", + "step": 5905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:08.155900", + "step": 5905, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.506937981816009e-05, + "timestamp": "2025-09-10 02:52:08.157942", + "step": 5906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:08.216684", + "step": 5906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001403413130901754, + "timestamp": "2025-09-10 02:52:08.227107", + "step": 5907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:08.280004", + "step": 5907, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032349787652492523, + "timestamp": "2025-09-10 02:52:08.285877", + "step": 5908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 12480075828672.0 + }, + "timestamp": "2025-09-10 02:52:08.374380", + "step": 5908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011857036733999848, + "timestamp": "2025-09-10 02:52:08.393369", + "step": 5909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:08.447821", + "step": 5909, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016681969282217324, + "timestamp": "2025-09-10 02:52:08.454867", + "step": 5910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:08.508893", + "step": 5910, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009027948835864663, + "timestamp": "2025-09-10 02:52:08.511575", + "step": 5911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:52:08.577929", + "step": 5911, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007573505281470716, + "timestamp": "2025-09-10 02:52:08.590941", + "step": 5912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:08.645856", + "step": 5912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008393190801143646, + "timestamp": "2025-09-10 02:52:08.647978", + "step": 5913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:08.702176", + "step": 5913, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008536014938727021, + "timestamp": "2025-09-10 02:52:08.704366", + "step": 5914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:08.763158", + "step": 5914, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0439261794090271, + "timestamp": "2025-09-10 02:52:08.773605", + "step": 5915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:52:08.846260", + "step": 5915, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011814313475042582, + "timestamp": "2025-09-10 02:52:08.860489", + "step": 5916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:52:08.921010", + "step": 5916, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.090380753041245e-05, + "timestamp": "2025-09-10 02:52:08.933005", + "step": 5917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:08.986309", + "step": 5917, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017138940165750682, + "timestamp": "2025-09-10 02:52:08.988682", + "step": 5918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:09.042107", + "step": 5918, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01822705566883087, + "timestamp": "2025-09-10 02:52:09.051736", + "step": 5919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:09.105232", + "step": 5919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010350747033953667, + "timestamp": "2025-09-10 02:52:09.113830", + "step": 5920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:09.167079", + "step": 5920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017399441450834274, + "timestamp": "2025-09-10 02:52:09.174906", + "step": 5921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:09.228229", + "step": 5921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032641488360241055, + "timestamp": "2025-09-10 02:52:09.237881", + "step": 5922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:09.291607", + "step": 5922, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014491185720544308, + "timestamp": "2025-09-10 02:52:09.294051", + "step": 5923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:09.347443", + "step": 5923, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000394363421946764, + "timestamp": "2025-09-10 02:52:09.353898", + "step": 5924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:09.406558", + "step": 5924, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008359685307368636, + "timestamp": "2025-09-10 02:52:09.412980", + "step": 5925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:09.466159", + "step": 5925, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016688613686710596, + "timestamp": "2025-09-10 02:52:09.468323", + "step": 5926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:09.521687", + "step": 5926, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005174115300178528, + "timestamp": "2025-09-10 02:52:09.523924", + "step": 5927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:09.576650", + "step": 5927, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033108533825725317, + "timestamp": "2025-09-10 02:52:09.582673", + "step": 5928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:09.634968", + "step": 5928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003466351772658527, + "timestamp": "2025-09-10 02:52:09.641489", + "step": 5929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:09.694949", + "step": 5929, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012293375039007515, + "timestamp": "2025-09-10 02:52:09.701451", + "step": 5930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:09.755155", + "step": 5930, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008823301759548485, + "timestamp": "2025-09-10 02:52:09.757458", + "step": 5931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:09.811106", + "step": 5931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021824785508215427, + "timestamp": "2025-09-10 02:52:09.817250", + "step": 5932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:09.870171", + "step": 5932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024673170410096645, + "timestamp": "2025-09-10 02:52:09.872249", + "step": 5933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:09.925172", + "step": 5933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.033712733536958694, + "timestamp": "2025-09-10 02:52:09.927398", + "step": 5934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:52:09.979801", + "step": 5934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021428009495139122, + "timestamp": "2025-09-10 02:52:09.982214", + "step": 5935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:10.034957", + "step": 5935, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.840552381007001e-05, + "timestamp": "2025-09-10 02:52:10.040982", + "step": 5936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:10.093895", + "step": 5936, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015409817569889128, + "timestamp": "2025-09-10 02:52:10.096123", + "step": 5937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:10.149800", + "step": 5937, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022064820223022252, + "timestamp": "2025-09-10 02:52:10.159398", + "step": 5938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:10.212968", + "step": 5938, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024093538522720337, + "timestamp": "2025-09-10 02:52:10.215126", + "step": 5939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:10.269070", + "step": 5939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002549313474446535, + "timestamp": "2025-09-10 02:52:10.279477", + "step": 5940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:10.333409", + "step": 5940, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023986550513654947, + "timestamp": "2025-09-10 02:52:10.335801", + "step": 5941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:10.388750", + "step": 5941, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015712468302808702, + "timestamp": "2025-09-10 02:52:10.390808", + "step": 5942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:10.444093", + "step": 5942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009599327109754086, + "timestamp": "2025-09-10 02:52:10.450488", + "step": 5943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:10.504262", + "step": 5943, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.971248851390556e-05, + "timestamp": "2025-09-10 02:52:10.510418", + "step": 5944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:10.562856", + "step": 5944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002541661378927529, + "timestamp": "2025-09-10 02:52:10.565007", + "step": 5945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:10.618683", + "step": 5945, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.932009611977264e-05, + "timestamp": "2025-09-10 02:52:10.628325", + "step": 5946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:10.681668", + "step": 5946, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011786666000261903, + "timestamp": "2025-09-10 02:52:10.683687", + "step": 5947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:10.736058", + "step": 5947, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002877658698707819, + "timestamp": "2025-09-10 02:52:10.742013", + "step": 5948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:10.794233", + "step": 5948, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009586106170900166, + "timestamp": "2025-09-10 02:52:10.796394", + "step": 5949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:10.849752", + "step": 5949, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015108101069927216, + "timestamp": "2025-09-10 02:52:10.852091", + "step": 5950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:10.905616", + "step": 5950, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005361175863072276, + "timestamp": "2025-09-10 02:52:10.911780", + "step": 5951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:10.966166", + "step": 5951, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019740124698728323, + "timestamp": "2025-09-10 02:52:10.976576", + "step": 5952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:11.030773", + "step": 5952, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002478157985024154, + "timestamp": "2025-09-10 02:52:11.038570", + "step": 5953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:11.092652", + "step": 5953, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.527694545686245e-05, + "timestamp": "2025-09-10 02:52:11.095013", + "step": 5954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:11.148521", + "step": 5954, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005762215587310493, + "timestamp": "2025-09-10 02:52:11.151148", + "step": 5955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:11.204443", + "step": 5955, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024336830247193575, + "timestamp": "2025-09-10 02:52:11.210646", + "step": 5956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:11.263810", + "step": 5956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02025148645043373, + "timestamp": "2025-09-10 02:52:11.265966", + "step": 5957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:11.320089", + "step": 5957, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.025399362668395042, + "timestamp": "2025-09-10 02:52:11.322691", + "step": 5958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:11.376963", + "step": 5958, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003289018932264298, + "timestamp": "2025-09-10 02:52:11.379232", + "step": 5959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:11.432533", + "step": 5959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01882443018257618, + "timestamp": "2025-09-10 02:52:11.438943", + "step": 5960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:11.492175", + "step": 5960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.023095743730664253, + "timestamp": "2025-09-10 02:52:11.494214", + "step": 5961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:11.548047", + "step": 5961, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015766258584335446, + "timestamp": "2025-09-10 02:52:11.550568", + "step": 5962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:11.604030", + "step": 5962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06276575475931168, + "timestamp": "2025-09-10 02:52:11.606321", + "step": 5963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:52:11.673075", + "step": 5963, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.02222202764824e-05, + "timestamp": "2025-09-10 02:52:11.686062", + "step": 5964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:11.739255", + "step": 5964, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008802754455246031, + "timestamp": "2025-09-10 02:52:11.745086", + "step": 5965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:11.799297", + "step": 5965, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012759133824147284, + "timestamp": "2025-09-10 02:52:11.801462", + "step": 5966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:11.855357", + "step": 5966, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03568331152200699, + "timestamp": "2025-09-10 02:52:11.864997", + "step": 5967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:11.918734", + "step": 5967, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021555619314312935, + "timestamp": "2025-09-10 02:52:11.925376", + "step": 5968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:11.978247", + "step": 5968, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001967688003787771, + "timestamp": "2025-09-10 02:52:11.980602", + "step": 5969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:12.033457", + "step": 5969, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020397835760377347, + "timestamp": "2025-09-10 02:52:12.035790", + "step": 5970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:12.089352", + "step": 5970, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021871724165976048, + "timestamp": "2025-09-10 02:52:12.091909", + "step": 5971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:12.145801", + "step": 5971, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004574107937514782, + "timestamp": "2025-09-10 02:52:12.152128", + "step": 5972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:12.205074", + "step": 5972, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011556023673620075, + "timestamp": "2025-09-10 02:52:12.207348", + "step": 5973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:12.260684", + "step": 5973, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012694244273006916, + "timestamp": "2025-09-10 02:52:12.263412", + "step": 5974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:12.322239", + "step": 5974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004663243016693741, + "timestamp": "2025-09-10 02:52:12.332688", + "step": 5975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:52:12.401041", + "step": 5975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03924744203686714, + "timestamp": "2025-09-10 02:52:12.415190", + "step": 5976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:12.468799", + "step": 5976, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029189076740294695, + "timestamp": "2025-09-10 02:52:12.476530", + "step": 5977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:12.530058", + "step": 5977, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002337169134989381, + "timestamp": "2025-09-10 02:52:12.532428", + "step": 5978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:12.585744", + "step": 5978, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005500641651451588, + "timestamp": "2025-09-10 02:52:12.587886", + "step": 5979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:12.641443", + "step": 5979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004958459176123142, + "timestamp": "2025-09-10 02:52:12.649982", + "step": 5980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:12.703633", + "step": 5980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035466270055621862, + "timestamp": "2025-09-10 02:52:12.714099", + "step": 5981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:12.767696", + "step": 5981, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018062000162899494, + "timestamp": "2025-09-10 02:52:12.773590", + "step": 5982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:12.828086", + "step": 5982, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0318772979080677, + "timestamp": "2025-09-10 02:52:12.837889", + "step": 5983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:12.891557", + "step": 5983, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030142883770167828, + "timestamp": "2025-09-10 02:52:12.897803", + "step": 5984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:12.950298", + "step": 5984, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011745187221094966, + "timestamp": "2025-09-10 02:52:12.958362", + "step": 5985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:52:13.033789", + "step": 5985, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000578440201934427, + "timestamp": "2025-09-10 02:52:13.047827", + "step": 5986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:52:13.114802", + "step": 5986, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010196411749348044, + "timestamp": "2025-09-10 02:52:13.127017", + "step": 5987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:13.181487", + "step": 5987, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001312433509156108, + "timestamp": "2025-09-10 02:52:13.187971", + "step": 5988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:13.241440", + "step": 5988, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018528975488152355, + "timestamp": "2025-09-10 02:52:13.243602", + "step": 5989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:13.297418", + "step": 5989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006053309189155698, + "timestamp": "2025-09-10 02:52:13.299695", + "step": 5990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:13.353490", + "step": 5990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031967618269845843, + "timestamp": "2025-09-10 02:52:13.355835", + "step": 5991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:13.410304", + "step": 5991, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006888638716191053, + "timestamp": "2025-09-10 02:52:13.418570", + "step": 5992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:13.472136", + "step": 5992, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014289746759459376, + "timestamp": "2025-09-10 02:52:13.474448", + "step": 5993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:13.529333", + "step": 5993, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008395778015255928, + "timestamp": "2025-09-10 02:52:13.531902", + "step": 5994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:52:13.585790", + "step": 5994, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003958672168664634, + "timestamp": "2025-09-10 02:52:13.588030", + "step": 5995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:13.642165", + "step": 5995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026239112485200167, + "timestamp": "2025-09-10 02:52:13.648636", + "step": 5996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:52:13.708132", + "step": 5996, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046100522740744054, + "timestamp": "2025-09-10 02:52:13.719871", + "step": 5997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:13.774716", + "step": 5997, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02448379620909691, + "timestamp": "2025-09-10 02:52:13.777203", + "step": 5998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:13.832896", + "step": 5998, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010140178492292762, + "timestamp": "2025-09-10 02:52:13.835196", + "step": 5999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:13.889114", + "step": 5999, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031572221778333187, + "timestamp": "2025-09-10 02:52:13.899407", + "step": 6000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 6000", + "timestamp": "2025-09-10 02:52:14.404496", + "step": 6000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:14.467266", + "step": 6000, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017229202785529196, + "timestamp": "2025-09-10 02:52:14.473338", + "step": 6001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:14.540019", + "step": 6001, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004986428655683994, + "timestamp": "2025-09-10 02:52:14.549806", + "step": 6002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:14.617694", + "step": 6002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002928458561655134, + "timestamp": "2025-09-10 02:52:14.628120", + "step": 6003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:14.695981", + "step": 6003, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00286271795630455, + "timestamp": "2025-09-10 02:52:14.703850", + "step": 6004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:14.766304", + "step": 6004, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004062298103235662, + "timestamp": "2025-09-10 02:52:14.768662", + "step": 6005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:14.837279", + "step": 6005, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000835696526337415, + "timestamp": "2025-09-10 02:52:14.840448", + "step": 6006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:14.905463", + "step": 6006, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014434696640819311, + "timestamp": "2025-09-10 02:52:14.911984", + "step": 6007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:14.974790", + "step": 6007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001262914971448481, + "timestamp": "2025-09-10 02:52:14.983796", + "step": 6008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:52:15.046599", + "step": 6008, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002870490134228021, + "timestamp": "2025-09-10 02:52:15.049247", + "step": 6009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:15.120134", + "step": 6009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019455747678875923, + "timestamp": "2025-09-10 02:52:15.123237", + "step": 6010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:52:15.193030", + "step": 6010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009880803991109133, + "timestamp": "2025-09-10 02:52:15.203904", + "step": 6011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:15.257809", + "step": 6011, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006967498338781297, + "timestamp": "2025-09-10 02:52:15.266504", + "step": 6012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:15.320326", + "step": 6012, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006020539440214634, + "timestamp": "2025-09-10 02:52:15.322726", + "step": 6013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:15.375817", + "step": 6013, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047595458454452455, + "timestamp": "2025-09-10 02:52:15.377903", + "step": 6014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:15.431920", + "step": 6014, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002407153369858861, + "timestamp": "2025-09-10 02:52:15.441480", + "step": 6015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:52:15.514101", + "step": 6015, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002565657254308462, + "timestamp": "2025-09-10 02:52:15.528337", + "step": 6016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:15.580579", + "step": 6016, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006355735007673502, + "timestamp": "2025-09-10 02:52:15.582905", + "step": 6017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:15.635325", + "step": 6017, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008355194586329162, + "timestamp": "2025-09-10 02:52:15.637474", + "step": 6018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:52:15.705707", + "step": 6018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021186231169849634, + "timestamp": "2025-09-10 02:52:15.718319", + "step": 6019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:52:15.770813", + "step": 6019, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009638232295401394, + "timestamp": "2025-09-10 02:52:15.776854", + "step": 6020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:15.830068", + "step": 6020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010094494791701436, + "timestamp": "2025-09-10 02:52:15.836023", + "step": 6021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:15.894317", + "step": 6021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005834107869304717, + "timestamp": "2025-09-10 02:52:15.904730", + "step": 6022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:15.958784", + "step": 6022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023633132223039865, + "timestamp": "2025-09-10 02:52:15.961649", + "step": 6023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:16.014832", + "step": 6023, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012404838344082236, + "timestamp": "2025-09-10 02:52:16.020849", + "step": 6024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:16.073197", + "step": 6024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00593190360814333, + "timestamp": "2025-09-10 02:52:16.076246", + "step": 6025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:16.129130", + "step": 6025, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0059447018429636955, + "timestamp": "2025-09-10 02:52:16.135654", + "step": 6026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:16.188272", + "step": 6026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013655134243890643, + "timestamp": "2025-09-10 02:52:16.196537", + "step": 6027, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:52:33.029849", + "step": 6027, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23118717.744525813, + "timestamp": "2025-09-10 02:52:33.034640", + "step": 6027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:33.091655", + "step": 6027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0071800448931753635, + "timestamp": "2025-09-10 02:52:33.099957", + "step": 6028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:33.161885", + "step": 6028, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0048779817298054695, + "timestamp": "2025-09-10 02:52:33.163931", + "step": 6029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:33.226906", + "step": 6029, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011815342586487532, + "timestamp": "2025-09-10 02:52:33.235156", + "step": 6030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:52:33.296963", + "step": 6030, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008699423633515835, + "timestamp": "2025-09-10 02:52:33.307821", + "step": 6031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:52:33.376889", + "step": 6031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011699561728164554, + "timestamp": "2025-09-10 02:52:33.390372", + "step": 6032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:33.443969", + "step": 6032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005398035864345729, + "timestamp": "2025-09-10 02:52:33.445942", + "step": 6033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:33.500888", + "step": 6033, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015815923688933253, + "timestamp": "2025-09-10 02:52:33.510619", + "step": 6034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:33.564357", + "step": 6034, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007847676984965801, + "timestamp": "2025-09-10 02:52:33.566428", + "step": 6035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:33.624295", + "step": 6035, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027494707610458136, + "timestamp": "2025-09-10 02:52:33.630433", + "step": 6036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:33.683950", + "step": 6036, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002524003619328141, + "timestamp": "2025-09-10 02:52:33.694448", + "step": 6037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:33.752773", + "step": 6037, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002844104077666998, + "timestamp": "2025-09-10 02:52:33.763186", + "step": 6038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:33.819637", + "step": 6038, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008749146945774555, + "timestamp": "2025-09-10 02:52:33.829227", + "step": 6039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:33.883108", + "step": 6039, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008817720226943493, + "timestamp": "2025-09-10 02:52:33.892137", + "step": 6040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:33.949163", + "step": 6040, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006424154853448272, + "timestamp": "2025-09-10 02:52:33.960270", + "step": 6041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:34.013199", + "step": 6041, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006086348555982113, + "timestamp": "2025-09-10 02:52:34.015916", + "step": 6042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:34.069368", + "step": 6042, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001611717278137803, + "timestamp": "2025-09-10 02:52:34.071485", + "step": 6043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:34.126223", + "step": 6043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015470280777662992, + "timestamp": "2025-09-10 02:52:34.136639", + "step": 6044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:34.194054", + "step": 6044, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002855368424206972, + "timestamp": "2025-09-10 02:52:34.205318", + "step": 6045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:34.258058", + "step": 6045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017798944609239697, + "timestamp": "2025-09-10 02:52:34.260366", + "step": 6046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:34.313960", + "step": 6046, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015405418816953897, + "timestamp": "2025-09-10 02:52:34.323593", + "step": 6047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:34.376921", + "step": 6047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010371079668402672, + "timestamp": "2025-09-10 02:52:34.386069", + "step": 6048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:52:34.444595", + "step": 6048, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012742745457217097, + "timestamp": "2025-09-10 02:52:34.456164", + "step": 6049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:34.514344", + "step": 6049, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007821349427103996, + "timestamp": "2025-09-10 02:52:34.524768", + "step": 6050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:34.578624", + "step": 6050, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004469706676900387, + "timestamp": "2025-09-10 02:52:34.581018", + "step": 6051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:34.633914", + "step": 6051, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021879347041249275, + "timestamp": "2025-09-10 02:52:34.639943", + "step": 6052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:34.692381", + "step": 6052, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008522382704541087, + "timestamp": "2025-09-10 02:52:34.694703", + "step": 6053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:34.748527", + "step": 6053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008947536698542535, + "timestamp": "2025-09-10 02:52:34.758122", + "step": 6054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:34.811428", + "step": 6054, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014520528493449092, + "timestamp": "2025-09-10 02:52:34.817891", + "step": 6055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:34.873736", + "step": 6055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011400927178328857, + "timestamp": "2025-09-10 02:52:34.881001", + "step": 6056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:34.933627", + "step": 6056, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031033085542730987, + "timestamp": "2025-09-10 02:52:34.935873", + "step": 6057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:34.989128", + "step": 6057, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033830462489277124, + "timestamp": "2025-09-10 02:52:34.998760", + "step": 6058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:35.051626", + "step": 6058, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006093700067140162, + "timestamp": "2025-09-10 02:52:35.053723", + "step": 6059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:35.106339", + "step": 6059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004717320844065398, + "timestamp": "2025-09-10 02:52:35.115245", + "step": 6060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:35.167772", + "step": 6060, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.661592775955796e-05, + "timestamp": "2025-09-10 02:52:35.170011", + "step": 6061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:35.222954", + "step": 6061, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013968811836093664, + "timestamp": "2025-09-10 02:52:35.225171", + "step": 6062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:35.278304", + "step": 6062, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.858936780598015e-05, + "timestamp": "2025-09-10 02:52:35.284924", + "step": 6063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:35.337706", + "step": 6063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001150972326286137, + "timestamp": "2025-09-10 02:52:35.343669", + "step": 6064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:35.397131", + "step": 6064, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000846659007947892, + "timestamp": "2025-09-10 02:52:35.407595", + "step": 6065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:52:35.469264", + "step": 6065, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001177606056444347, + "timestamp": "2025-09-10 02:52:35.480154", + "step": 6066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:35.535306", + "step": 6066, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000570034550037235, + "timestamp": "2025-09-10 02:52:35.545170", + "step": 6067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:35.598010", + "step": 6067, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030715492903254926, + "timestamp": "2025-09-10 02:52:35.605428", + "step": 6068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:52:35.657816", + "step": 6068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012023419549223036, + "timestamp": "2025-09-10 02:52:35.659973", + "step": 6069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:35.712542", + "step": 6069, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0075134942308068275, + "timestamp": "2025-09-10 02:52:35.714654", + "step": 6070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:52:35.774872", + "step": 6070, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048709503607824445, + "timestamp": "2025-09-10 02:52:35.785815", + "step": 6071, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:35.839061", + "step": 6071, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.048846156161744e-05, + "timestamp": "2025-09-10 02:52:35.845092", + "step": 6072, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:35.898252", + "step": 6072, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000885853951331228, + "timestamp": "2025-09-10 02:52:35.908755", + "step": 6073, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:35.962978", + "step": 6073, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009545084903948009, + "timestamp": "2025-09-10 02:52:35.965336", + "step": 6074, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:36.018839", + "step": 6074, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015305677661672235, + "timestamp": "2025-09-10 02:52:36.025045", + "step": 6075, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:36.079706", + "step": 6075, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011637846939265728, + "timestamp": "2025-09-10 02:52:36.090305", + "step": 6076, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:52:36.149404", + "step": 6076, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004362560866866261, + "timestamp": "2025-09-10 02:52:36.160887", + "step": 6077, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:36.214008", + "step": 6077, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.658708298346028e-05, + "timestamp": "2025-09-10 02:52:36.220357", + "step": 6078, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:52:36.281583", + "step": 6078, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001787340734153986, + "timestamp": "2025-09-10 02:52:36.292470", + "step": 6079, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:36.346165", + "step": 6079, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000420929427491501, + "timestamp": "2025-09-10 02:52:36.352251", + "step": 6080, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:36.404865", + "step": 6080, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015851258067414165, + "timestamp": "2025-09-10 02:52:36.407167", + "step": 6081, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:36.459981", + "step": 6081, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020229142159223557, + "timestamp": "2025-09-10 02:52:36.466589", + "step": 6082, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:36.520007", + "step": 6082, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032248589559458196, + "timestamp": "2025-09-10 02:52:36.522254", + "step": 6083, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:36.580299", + "step": 6083, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0052845412865281105, + "timestamp": "2025-09-10 02:52:36.591538", + "step": 6084, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:36.644196", + "step": 6084, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017015948833432049, + "timestamp": "2025-09-10 02:52:36.650489", + "step": 6085, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:52:36.708430", + "step": 6085, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02619732730090618, + "timestamp": "2025-09-10 02:52:36.718925", + "step": 6086, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:36.773419", + "step": 6086, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.551394158625044e-05, + "timestamp": "2025-09-10 02:52:36.775673", + "step": 6087, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:36.829168", + "step": 6087, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001303064200328663, + "timestamp": "2025-09-10 02:52:36.835538", + "step": 6088, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:36.888503", + "step": 6088, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016703010769560933, + "timestamp": "2025-09-10 02:52:36.891325", + "step": 6089, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:36.944701", + "step": 6089, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005203016102313995, + "timestamp": "2025-09-10 02:52:36.952785", + "step": 6090, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:52:37.006194", + "step": 6090, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.93345622898778e-06, + "timestamp": "2025-09-10 02:52:37.008457", + "step": 6091, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:37.061654", + "step": 6091, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001508936838945374, + "timestamp": "2025-09-10 02:52:37.068932", + "step": 6092, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:52:37.121674", + "step": 6092, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010944458335870877, + "timestamp": "2025-09-10 02:52:37.124877", + "step": 6093, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 12480075828672.0 + }, + "timestamp": "2025-09-10 02:52:37.216274", + "step": 6093, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003187777940183878, + "timestamp": "2025-09-10 02:52:37.233627", + "step": 6094, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:37.287810", + "step": 6094, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022569263819605112, + "timestamp": "2025-09-10 02:52:37.290166", + "step": 6095, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:37.343523", + "step": 6095, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011813950550276786, + "timestamp": "2025-09-10 02:52:37.349881", + "step": 6096, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:52:37.421815", + "step": 6096, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008416800992563367, + "timestamp": "2025-09-10 02:52:37.436731", + "step": 6097, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:37.491553", + "step": 6097, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006296943174675107, + "timestamp": "2025-09-10 02:52:37.501323", + "step": 6098, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:37.554742", + "step": 6098, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.272789738024585e-05, + "timestamp": "2025-09-10 02:52:37.557080", + "step": 6099, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:37.610619", + "step": 6099, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005077628302387893, + "timestamp": "2025-09-10 02:52:37.622175", + "step": 6100, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:37.683574", + "step": 6100, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012766268628183752, + "timestamp": "2025-09-10 02:52:37.691599", + "step": 6101, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:37.750681", + "step": 6101, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014411035226657987, + "timestamp": "2025-09-10 02:52:37.758605", + "step": 6102, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:37.813848", + "step": 6102, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021063178777694702, + "timestamp": "2025-09-10 02:52:37.821372", + "step": 6103, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:37.877240", + "step": 6103, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005084304721094668, + "timestamp": "2025-09-10 02:52:37.888506", + "step": 6104, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:37.944235", + "step": 6104, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013107885024510324, + "timestamp": "2025-09-10 02:52:37.948948", + "step": 6105, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:52:38.002955", + "step": 6105, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000524270290043205, + "timestamp": "2025-09-10 02:52:38.011025", + "step": 6106, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:38.071444", + "step": 6106, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.484802335966378e-05, + "timestamp": "2025-09-10 02:52:38.077778", + "step": 6107, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:38.138325", + "step": 6107, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005922123673371971, + "timestamp": "2025-09-10 02:52:38.149608", + "step": 6108, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:52:38.219329", + "step": 6108, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012583202624227852, + "timestamp": "2025-09-10 02:52:38.233053", + "step": 6109, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:38.296866", + "step": 6109, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004160685057286173, + "timestamp": "2025-09-10 02:52:38.303387", + "step": 6110, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:38.361019", + "step": 6110, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.073786451248452e-05, + "timestamp": "2025-09-10 02:52:38.369975", + "step": 6111, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:38.427110", + "step": 6111, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006909678224474192, + "timestamp": "2025-09-10 02:52:38.441625", + "step": 6112, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:38.498147", + "step": 6112, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013595778727903962, + "timestamp": "2025-09-10 02:52:38.503891", + "step": 6113, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:52:38.566533", + "step": 6113, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021880402346141636, + "timestamp": "2025-09-10 02:52:38.568890", + "step": 6114, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:38.622010", + "step": 6114, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.544974894495681e-05, + "timestamp": "2025-09-10 02:52:38.624398", + "step": 6115, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:38.677335", + "step": 6115, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015257038467098027, + "timestamp": "2025-09-10 02:52:38.683295", + "step": 6116, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:38.735885", + "step": 6116, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020383484661579132, + "timestamp": "2025-09-10 02:52:38.742417", + "step": 6117, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:38.795880", + "step": 6117, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005315456073731184, + "timestamp": "2025-09-10 02:52:38.797802", + "step": 6118, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:52:38.887778", + "step": 6118, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006385329179465771, + "timestamp": "2025-09-10 02:52:38.904884", + "step": 6119, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:38.958727", + "step": 6119, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001608455495443195, + "timestamp": "2025-09-10 02:52:38.964591", + "step": 6120, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:39.016779", + "step": 6120, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000821257010102272, + "timestamp": "2025-09-10 02:52:39.019025", + "step": 6121, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:39.071908", + "step": 6121, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008781217620708048, + "timestamp": "2025-09-10 02:52:39.074294", + "step": 6122, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:39.127230", + "step": 6122, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06590049713850021, + "timestamp": "2025-09-10 02:52:39.129374", + "step": 6123, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:39.181928", + "step": 6123, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000752792228013277, + "timestamp": "2025-09-10 02:52:39.187886", + "step": 6124, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:39.240005", + "step": 6124, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007797020371071994, + "timestamp": "2025-09-10 02:52:39.243124", + "step": 6125, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:39.295849", + "step": 6125, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019136367365717888, + "timestamp": "2025-09-10 02:52:39.297999", + "step": 6126, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:39.350506", + "step": 6126, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003150348784402013, + "timestamp": "2025-09-10 02:52:39.352990", + "step": 6127, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:39.405640", + "step": 6127, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.519495506538078e-05, + "timestamp": "2025-09-10 02:52:39.411555", + "step": 6128, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:39.463716", + "step": 6128, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013283970474731177, + "timestamp": "2025-09-10 02:52:39.466819", + "step": 6129, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:39.520533", + "step": 6129, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014158491103444248, + "timestamp": "2025-09-10 02:52:39.530120", + "step": 6130, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:39.582677", + "step": 6130, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024900168646126986, + "timestamp": "2025-09-10 02:52:39.584971", + "step": 6131, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:39.637806", + "step": 6131, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032622383441776037, + "timestamp": "2025-09-10 02:52:39.643743", + "step": 6132, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:39.696496", + "step": 6132, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009876827243715525, + "timestamp": "2025-09-10 02:52:39.704817", + "step": 6133, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:39.758051", + "step": 6133, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007712595164775848, + "timestamp": "2025-09-10 02:52:39.760395", + "step": 6134, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:39.813640", + "step": 6134, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016517074254807085, + "timestamp": "2025-09-10 02:52:39.816522", + "step": 6135, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:39.870908", + "step": 6135, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018742047250270844, + "timestamp": "2025-09-10 02:52:39.877045", + "step": 6136, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:39.929047", + "step": 6136, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007885963656008244, + "timestamp": "2025-09-10 02:52:39.931751", + "step": 6137, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:39.984069", + "step": 6137, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021347992587834597, + "timestamp": "2025-09-10 02:52:39.986332", + "step": 6138, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:40.038624", + "step": 6138, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.2866826839162968e-05, + "timestamp": "2025-09-10 02:52:40.040959", + "step": 6139, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:40.093467", + "step": 6139, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012924431823194027, + "timestamp": "2025-09-10 02:52:40.099380", + "step": 6140, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:40.151493", + "step": 6140, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019553520251065493, + "timestamp": "2025-09-10 02:52:40.161783", + "step": 6141, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:40.214871", + "step": 6141, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001370320824207738, + "timestamp": "2025-09-10 02:52:40.217711", + "step": 6142, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:40.270040", + "step": 6142, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04433509334921837, + "timestamp": "2025-09-10 02:52:40.272115", + "step": 6143, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:40.324490", + "step": 6143, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002048466121777892, + "timestamp": "2025-09-10 02:52:40.330441", + "step": 6144, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:52:40.425013", + "step": 6144, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015178490430116653, + "timestamp": "2025-09-10 02:52:40.445400", + "step": 6145, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:40.499151", + "step": 6145, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003375701140612364, + "timestamp": "2025-09-10 02:52:40.501437", + "step": 6146, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:40.554507", + "step": 6146, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025541919749230146, + "timestamp": "2025-09-10 02:52:40.556869", + "step": 6147, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:40.609949", + "step": 6147, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01113084889948368, + "timestamp": "2025-09-10 02:52:40.615844", + "step": 6148, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:40.667782", + "step": 6148, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007157630170695484, + "timestamp": "2025-09-10 02:52:40.676089", + "step": 6149, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:40.730330", + "step": 6149, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001018314273096621, + "timestamp": "2025-09-10 02:52:40.736073", + "step": 6150, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:40.789959", + "step": 6150, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005166871007531881, + "timestamp": "2025-09-10 02:52:40.792412", + "step": 6151, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:52:40.845983", + "step": 6151, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041923296521417797, + "timestamp": "2025-09-10 02:52:40.852033", + "step": 6152, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:40.904415", + "step": 6152, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003797454119194299, + "timestamp": "2025-09-10 02:52:40.907826", + "step": 6153, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:40.963443", + "step": 6153, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042816068162210286, + "timestamp": "2025-09-10 02:52:40.969984", + "step": 6154, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:41.023183", + "step": 6154, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011409942817408592, + "timestamp": "2025-09-10 02:52:41.031330", + "step": 6155, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:41.084030", + "step": 6155, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017229224322363734, + "timestamp": "2025-09-10 02:52:41.089742", + "step": 6156, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:41.142200", + "step": 6156, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0062713404186069965, + "timestamp": "2025-09-10 02:52:41.144209", + "step": 6157, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:41.196937", + "step": 6157, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001340866001555696, + "timestamp": "2025-09-10 02:52:41.199862", + "step": 6158, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:41.253181", + "step": 6158, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.393969331635162e-05, + "timestamp": "2025-09-10 02:52:41.255202", + "step": 6159, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:41.308061", + "step": 6159, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.729629528010264e-05, + "timestamp": "2025-09-10 02:52:41.313673", + "step": 6160, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:41.366465", + "step": 6160, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006829385529272258, + "timestamp": "2025-09-10 02:52:41.372808", + "step": 6161, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:52:41.441058", + "step": 6161, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006158491596579552, + "timestamp": "2025-09-10 02:52:41.453641", + "step": 6162, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:41.508485", + "step": 6162, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023279429296962917, + "timestamp": "2025-09-10 02:52:41.518297", + "step": 6163, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:41.571240", + "step": 6163, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.347532275365666e-05, + "timestamp": "2025-09-10 02:52:41.578531", + "step": 6164, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:41.630714", + "step": 6164, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.7901072826352902e-05, + "timestamp": "2025-09-10 02:52:41.633032", + "step": 6165, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:52:41.686104", + "step": 6165, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012794139795005322, + "timestamp": "2025-09-10 02:52:41.691960", + "step": 6166, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:41.745007", + "step": 6166, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001425377675332129, + "timestamp": "2025-09-10 02:52:41.747110", + "step": 6167, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:41.801138", + "step": 6167, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006850698264315724, + "timestamp": "2025-09-10 02:52:41.811718", + "step": 6168, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:41.863998", + "step": 6168, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008233123226091266, + "timestamp": "2025-09-10 02:52:41.872276", + "step": 6169, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:41.925347", + "step": 6169, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009910735534504056, + "timestamp": "2025-09-10 02:52:41.927443", + "step": 6170, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:41.980059", + "step": 6170, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.992994455387816e-05, + "timestamp": "2025-09-10 02:52:41.988358", + "step": 6171, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:42.041808", + "step": 6171, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030609042732976377, + "timestamp": "2025-09-10 02:52:42.047515", + "step": 6172, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:52:42.099885", + "step": 6172, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014566489262506366, + "timestamp": "2025-09-10 02:52:42.101949", + "step": 6173, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:52:42.154572", + "step": 6173, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04792408272624016, + "timestamp": "2025-09-10 02:52:42.156780", + "step": 6174, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:52:58.917402", + "step": 6174, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23046639.44568271, + "timestamp": "2025-09-10 02:52:58.920389", + "step": 6174, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:58.975360", + "step": 6174, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.4728592911269516e-05, + "timestamp": "2025-09-10 02:52:58.977422", + "step": 6175, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:52:59.031208", + "step": 6175, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04376506805419922, + "timestamp": "2025-09-10 02:52:59.037573", + "step": 6176, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:59.090501", + "step": 6176, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014188756234943867, + "timestamp": "2025-09-10 02:52:59.098370", + "step": 6177, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:59.151951", + "step": 6177, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001620362512767315, + "timestamp": "2025-09-10 02:52:59.154136", + "step": 6178, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:52:59.207337", + "step": 6178, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014629093930125237, + "timestamp": "2025-09-10 02:52:59.209550", + "step": 6179, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:52:59.283324", + "step": 6179, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013443955220282078, + "timestamp": "2025-09-10 02:52:59.297689", + "step": 6180, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:52:59.363087", + "step": 6180, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00608267355710268, + "timestamp": "2025-09-10 02:52:59.376293", + "step": 6181, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:59.431820", + "step": 6181, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004723257734440267, + "timestamp": "2025-09-10 02:52:59.433872", + "step": 6182, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:59.488613", + "step": 6182, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003553885966539383, + "timestamp": "2025-09-10 02:52:59.498409", + "step": 6183, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:52:59.570970", + "step": 6183, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032707207719795406, + "timestamp": "2025-09-10 02:52:59.585171", + "step": 6184, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:52:59.638281", + "step": 6184, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009438624256290495, + "timestamp": "2025-09-10 02:52:59.640664", + "step": 6185, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:52:59.710178", + "step": 6185, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011728469689842314, + "timestamp": "2025-09-10 02:52:59.723074", + "step": 6186, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:52:59.775687", + "step": 6186, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.939744293456897e-05, + "timestamp": "2025-09-10 02:52:59.777850", + "step": 6187, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:52:59.830557", + "step": 6187, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003583366284146905, + "timestamp": "2025-09-10 02:52:59.839431", + "step": 6188, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:52:59.892898", + "step": 6188, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005416512722149491, + "timestamp": "2025-09-10 02:52:59.903411", + "step": 6189, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:52:59.956642", + "step": 6189, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003713883925229311, + "timestamp": "2025-09-10 02:52:59.966238", + "step": 6190, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:00.019785", + "step": 6190, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013095579342916608, + "timestamp": "2025-09-10 02:53:00.022060", + "step": 6191, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:00.075655", + "step": 6191, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002630103554110974, + "timestamp": "2025-09-10 02:53:00.086062", + "step": 6192, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:00.138766", + "step": 6192, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.038398325676098e-05, + "timestamp": "2025-09-10 02:53:00.141078", + "step": 6193, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:00.195668", + "step": 6193, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.434372385730967e-05, + "timestamp": "2025-09-10 02:53:00.204408", + "step": 6194, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:00.259315", + "step": 6194, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019118929049000144, + "timestamp": "2025-09-10 02:53:00.266362", + "step": 6195, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:00.320393", + "step": 6195, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012861691357102245, + "timestamp": "2025-09-10 02:53:00.328667", + "step": 6196, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:00.381785", + "step": 6196, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030544851324521005, + "timestamp": "2025-09-10 02:53:00.392245", + "step": 6197, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:00.445490", + "step": 6197, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.9226237782277167e-05, + "timestamp": "2025-09-10 02:53:00.447579", + "step": 6198, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:00.500104", + "step": 6198, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008024958078749478, + "timestamp": "2025-09-10 02:53:00.502260", + "step": 6199, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:00.554976", + "step": 6199, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034939402248710394, + "timestamp": "2025-09-10 02:53:00.560709", + "step": 6200, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:00.613012", + "step": 6200, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.811767186969519e-05, + "timestamp": "2025-09-10 02:53:00.615179", + "step": 6201, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:00.667571", + "step": 6201, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001254458911716938, + "timestamp": "2025-09-10 02:53:00.669881", + "step": 6202, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:00.722231", + "step": 6202, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003831215144600719, + "timestamp": "2025-09-10 02:53:00.724446", + "step": 6203, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:53:00.785840", + "step": 6203, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031643040711060166, + "timestamp": "2025-09-10 02:53:00.797682", + "step": 6204, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:53:00.849974", + "step": 6204, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032409251434728503, + "timestamp": "2025-09-10 02:53:00.852215", + "step": 6205, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:00.905670", + "step": 6205, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001230636116815731, + "timestamp": "2025-09-10 02:53:00.908127", + "step": 6206, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:00.963175", + "step": 6206, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018245227402076125, + "timestamp": "2025-09-10 02:53:00.970219", + "step": 6207, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:01.024014", + "step": 6207, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019986736588180065, + "timestamp": "2025-09-10 02:53:01.034881", + "step": 6208, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:01.106681", + "step": 6208, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011568238958716393, + "timestamp": "2025-09-10 02:53:01.108790", + "step": 6209, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:01.161380", + "step": 6209, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005507374298758805, + "timestamp": "2025-09-10 02:53:01.167824", + "step": 6210, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:01.221538", + "step": 6210, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015149227692745626, + "timestamp": "2025-09-10 02:53:01.228806", + "step": 6211, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:01.282112", + "step": 6211, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010124669643118978, + "timestamp": "2025-09-10 02:53:01.287997", + "step": 6212, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:01.339946", + "step": 6212, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008196182898245752, + "timestamp": "2025-09-10 02:53:01.342211", + "step": 6213, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:01.395159", + "step": 6213, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002756024769041687, + "timestamp": "2025-09-10 02:53:01.397477", + "step": 6214, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:01.450322", + "step": 6214, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004791795217897743, + "timestamp": "2025-09-10 02:53:01.452531", + "step": 6215, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:01.505607", + "step": 6215, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031861786264926195, + "timestamp": "2025-09-10 02:53:01.512718", + "step": 6216, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:01.568430", + "step": 6216, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003523991326801479, + "timestamp": "2025-09-10 02:53:01.571664", + "step": 6217, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:53:01.650088", + "step": 6217, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004075986216776073, + "timestamp": "2025-09-10 02:53:01.663784", + "step": 6218, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:01.717659", + "step": 6218, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006375530268996954, + "timestamp": "2025-09-10 02:53:01.720051", + "step": 6219, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:01.773376", + "step": 6219, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011905818246304989, + "timestamp": "2025-09-10 02:53:01.783459", + "step": 6220, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:01.839031", + "step": 6220, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004494747263379395, + "timestamp": "2025-09-10 02:53:01.850885", + "step": 6221, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:53:01.928899", + "step": 6221, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004530021105892956, + "timestamp": "2025-09-10 02:53:01.941599", + "step": 6222, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:01.995143", + "step": 6222, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012486240302678198, + "timestamp": "2025-09-10 02:53:02.002780", + "step": 6223, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:02.055840", + "step": 6223, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.643323624506593e-05, + "timestamp": "2025-09-10 02:53:02.062247", + "step": 6224, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:02.114696", + "step": 6224, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001208492394653149, + "timestamp": "2025-09-10 02:53:02.116949", + "step": 6225, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:02.170164", + "step": 6225, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007338562281802297, + "timestamp": "2025-09-10 02:53:02.172395", + "step": 6226, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:02.225545", + "step": 6226, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000566408911254257, + "timestamp": "2025-09-10 02:53:02.227698", + "step": 6227, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:02.280600", + "step": 6227, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.674916949123144e-05, + "timestamp": "2025-09-10 02:53:02.286642", + "step": 6228, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:02.339094", + "step": 6228, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.1840201295854058e-05, + "timestamp": "2025-09-10 02:53:02.341216", + "step": 6229, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:02.394610", + "step": 6229, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003923589829355478, + "timestamp": "2025-09-10 02:53:02.404153", + "step": 6230, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:02.457174", + "step": 6230, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002102156635373831, + "timestamp": "2025-09-10 02:53:02.459212", + "step": 6231, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:02.512156", + "step": 6231, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001509518246166408, + "timestamp": "2025-09-10 02:53:02.519341", + "step": 6232, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:02.572362", + "step": 6232, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005916491500101984, + "timestamp": "2025-09-10 02:53:02.582913", + "step": 6233, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:02.636330", + "step": 6233, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018602547061163932, + "timestamp": "2025-09-10 02:53:02.638529", + "step": 6234, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:02.691303", + "step": 6234, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012755133211612701, + "timestamp": "2025-09-10 02:53:02.694084", + "step": 6235, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:02.746816", + "step": 6235, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001339509035460651, + "timestamp": "2025-09-10 02:53:02.752995", + "step": 6236, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:02.805209", + "step": 6236, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006676294142380357, + "timestamp": "2025-09-10 02:53:02.807389", + "step": 6237, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:02.860172", + "step": 6237, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004006302042398602, + "timestamp": "2025-09-10 02:53:02.862501", + "step": 6238, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:02.914776", + "step": 6238, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003464436740614474, + "timestamp": "2025-09-10 02:53:02.917875", + "step": 6239, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:53:02.969826", + "step": 6239, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.9572183090494946e-05, + "timestamp": "2025-09-10 02:53:02.975666", + "step": 6240, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:53:03.041667", + "step": 6240, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002842635731212795, + "timestamp": "2025-09-10 02:53:03.055304", + "step": 6241, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:03.109434", + "step": 6241, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018574048590380698, + "timestamp": "2025-09-10 02:53:03.119077", + "step": 6242, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:03.172206", + "step": 6242, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011604089377215132, + "timestamp": "2025-09-10 02:53:03.174686", + "step": 6243, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:03.227367", + "step": 6243, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001450964919058606, + "timestamp": "2025-09-10 02:53:03.233449", + "step": 6244, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:03.285304", + "step": 6244, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018526332860346884, + "timestamp": "2025-09-10 02:53:03.293507", + "step": 6245, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:03.346214", + "step": 6245, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004118310462217778, + "timestamp": "2025-09-10 02:53:03.348356", + "step": 6246, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:53:03.421194", + "step": 6246, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007664974313229322, + "timestamp": "2025-09-10 02:53:03.434895", + "step": 6247, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:03.488222", + "step": 6247, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003324569552205503, + "timestamp": "2025-09-10 02:53:03.495376", + "step": 6248, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:03.548297", + "step": 6248, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048176193377003074, + "timestamp": "2025-09-10 02:53:03.550739", + "step": 6249, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:03.603778", + "step": 6249, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005551331560127437, + "timestamp": "2025-09-10 02:53:03.605936", + "step": 6250, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:03.658680", + "step": 6250, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.801698353025131e-05, + "timestamp": "2025-09-10 02:53:03.661033", + "step": 6251, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:03.713967", + "step": 6251, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002592813689261675, + "timestamp": "2025-09-10 02:53:03.719856", + "step": 6252, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:03.772900", + "step": 6252, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.2752126167470124e-05, + "timestamp": "2025-09-10 02:53:03.779066", + "step": 6253, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:03.831613", + "step": 6253, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01028306595981121, + "timestamp": "2025-09-10 02:53:03.834685", + "step": 6254, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:03.887555", + "step": 6254, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025878133601509035, + "timestamp": "2025-09-10 02:53:03.889693", + "step": 6255, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:03.942123", + "step": 6255, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0040932828560471535, + "timestamp": "2025-09-10 02:53:03.951090", + "step": 6256, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:04.003196", + "step": 6256, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.455898255808279e-05, + "timestamp": "2025-09-10 02:53:04.005309", + "step": 6257, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:04.057743", + "step": 6257, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.057732076034881e-05, + "timestamp": "2025-09-10 02:53:04.064247", + "step": 6258, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:04.123765", + "step": 6258, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015628700202796608, + "timestamp": "2025-09-10 02:53:04.134435", + "step": 6259, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:04.186888", + "step": 6259, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005173964309506118, + "timestamp": "2025-09-10 02:53:04.192542", + "step": 6260, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:04.244198", + "step": 6260, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.169626820133999e-05, + "timestamp": "2025-09-10 02:53:04.246429", + "step": 6261, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:04.298955", + "step": 6261, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009875959949567914, + "timestamp": "2025-09-10 02:53:04.305529", + "step": 6262, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:04.358412", + "step": 6262, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002259409084217623, + "timestamp": "2025-09-10 02:53:04.361274", + "step": 6263, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:04.414309", + "step": 6263, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031470516114495695, + "timestamp": "2025-09-10 02:53:04.419886", + "step": 6264, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:04.472004", + "step": 6264, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017533283680677414, + "timestamp": "2025-09-10 02:53:04.480395", + "step": 6265, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:04.532855", + "step": 6265, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000321357452776283, + "timestamp": "2025-09-10 02:53:04.535149", + "step": 6266, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:04.588209", + "step": 6266, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00523083982989192, + "timestamp": "2025-09-10 02:53:04.590480", + "step": 6267, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:04.642916", + "step": 6267, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013569157454185188, + "timestamp": "2025-09-10 02:53:04.648568", + "step": 6268, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:04.700953", + "step": 6268, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020350891281850636, + "timestamp": "2025-09-10 02:53:04.702936", + "step": 6269, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:04.757287", + "step": 6269, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001151436663349159, + "timestamp": "2025-09-10 02:53:04.767099", + "step": 6270, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:04.819819", + "step": 6270, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.023529188707470894, + "timestamp": "2025-09-10 02:53:04.822871", + "step": 6271, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:04.875785", + "step": 6271, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004282921727281064, + "timestamp": "2025-09-10 02:53:04.881725", + "step": 6272, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:04.933531", + "step": 6272, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014876218046993017, + "timestamp": "2025-09-10 02:53:04.936929", + "step": 6273, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:04.991898", + "step": 6273, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011225293565075845, + "timestamp": "2025-09-10 02:53:04.994084", + "step": 6274, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:05.047754", + "step": 6274, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041951602906920016, + "timestamp": "2025-09-10 02:53:05.057366", + "step": 6275, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:05.111449", + "step": 6275, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.044321394758299e-05, + "timestamp": "2025-09-10 02:53:05.122026", + "step": 6276, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:05.174318", + "step": 6276, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010085922986036167, + "timestamp": "2025-09-10 02:53:05.176543", + "step": 6277, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:05.238544", + "step": 6277, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008098947000689805, + "timestamp": "2025-09-10 02:53:05.240764", + "step": 6278, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:05.293203", + "step": 6278, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.599256736692041e-05, + "timestamp": "2025-09-10 02:53:05.295465", + "step": 6279, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:05.347769", + "step": 6279, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0563594289124012, + "timestamp": "2025-09-10 02:53:05.353446", + "step": 6280, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:05.405618", + "step": 6280, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002962738217320293, + "timestamp": "2025-09-10 02:53:05.413830", + "step": 6281, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:05.465885", + "step": 6281, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004825623764190823, + "timestamp": "2025-09-10 02:53:05.468861", + "step": 6282, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:05.527686", + "step": 6282, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011310545960441232, + "timestamp": "2025-09-10 02:53:05.538120", + "step": 6283, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:05.591285", + "step": 6283, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003833844675682485, + "timestamp": "2025-09-10 02:53:05.596925", + "step": 6284, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:05.649436", + "step": 6284, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005850231973454356, + "timestamp": "2025-09-10 02:53:05.652407", + "step": 6285, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:05.712310", + "step": 6285, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.5168409997131675e-05, + "timestamp": "2025-09-10 02:53:05.723059", + "step": 6286, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:05.777002", + "step": 6286, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.450110908597708e-05, + "timestamp": "2025-09-10 02:53:05.786562", + "step": 6287, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:05.838940", + "step": 6287, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016273361688945442, + "timestamp": "2025-09-10 02:53:05.844711", + "step": 6288, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:05.897167", + "step": 6288, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005180092412047088, + "timestamp": "2025-09-10 02:53:05.899303", + "step": 6289, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:05.952277", + "step": 6289, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006385315791703761, + "timestamp": "2025-09-10 02:53:05.954673", + "step": 6290, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:06.007065", + "step": 6290, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004625410947483033, + "timestamp": "2025-09-10 02:53:06.009201", + "step": 6291, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:06.062111", + "step": 6291, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010765146726043895, + "timestamp": "2025-09-10 02:53:06.068031", + "step": 6292, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:53:06.119909", + "step": 6292, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.746909250272438e-05, + "timestamp": "2025-09-10 02:53:06.122161", + "step": 6293, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:06.175118", + "step": 6293, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010182967409491539, + "timestamp": "2025-09-10 02:53:06.177405", + "step": 6294, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:06.230576", + "step": 6294, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006712442263960838, + "timestamp": "2025-09-10 02:53:06.232795", + "step": 6295, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:06.285477", + "step": 6295, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.2704712541308254e-05, + "timestamp": "2025-09-10 02:53:06.291072", + "step": 6296, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:06.343390", + "step": 6296, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001639856054680422, + "timestamp": "2025-09-10 02:53:06.353560", + "step": 6297, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:06.407937", + "step": 6297, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001479656930314377, + "timestamp": "2025-09-10 02:53:06.410341", + "step": 6298, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:06.463333", + "step": 6298, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004781940020620823, + "timestamp": "2025-09-10 02:53:06.471491", + "step": 6299, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:06.526139", + "step": 6299, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015522715693805367, + "timestamp": "2025-09-10 02:53:06.536718", + "step": 6300, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:06.588779", + "step": 6300, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.8653279767022468e-05, + "timestamp": "2025-09-10 02:53:06.590965", + "step": 6301, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:06.644052", + "step": 6301, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.126219861675054e-05, + "timestamp": "2025-09-10 02:53:06.650663", + "step": 6302, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:06.703848", + "step": 6302, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.5612262106733397e-05, + "timestamp": "2025-09-10 02:53:06.705910", + "step": 6303, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:06.758360", + "step": 6303, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001806333748390898, + "timestamp": "2025-09-10 02:53:06.767425", + "step": 6304, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:06.820217", + "step": 6304, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003336466324981302, + "timestamp": "2025-09-10 02:53:06.826672", + "step": 6305, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:06.879599", + "step": 6305, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016676145605742931, + "timestamp": "2025-09-10 02:53:06.882670", + "step": 6306, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:53:06.934997", + "step": 6306, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.678514182567596e-05, + "timestamp": "2025-09-10 02:53:06.936930", + "step": 6307, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:06.990430", + "step": 6307, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012330238241702318, + "timestamp": "2025-09-10 02:53:07.000810", + "step": 6308, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:07.053197", + "step": 6308, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002929836919065565, + "timestamp": "2025-09-10 02:53:07.055382", + "step": 6309, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:07.107926", + "step": 6309, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.920703834912274e-05, + "timestamp": "2025-09-10 02:53:07.109969", + "step": 6310, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:07.162517", + "step": 6310, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000312409974867478, + "timestamp": "2025-09-10 02:53:07.169109", + "step": 6311, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:07.221887", + "step": 6311, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.205670019378886e-05, + "timestamp": "2025-09-10 02:53:07.229275", + "step": 6312, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:07.281961", + "step": 6312, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.2824809497687966e-05, + "timestamp": "2025-09-10 02:53:07.284138", + "step": 6313, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:07.341893", + "step": 6313, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.1424997865105979e-05, + "timestamp": "2025-09-10 02:53:07.352294", + "step": 6314, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:07.405279", + "step": 6314, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000568813644349575, + "timestamp": "2025-09-10 02:53:07.407585", + "step": 6315, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:07.461177", + "step": 6315, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018300736555829644, + "timestamp": "2025-09-10 02:53:07.471584", + "step": 6316, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:07.523811", + "step": 6316, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003173965960741043, + "timestamp": "2025-09-10 02:53:07.526998", + "step": 6317, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:07.581613", + "step": 6317, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005713038146495819, + "timestamp": "2025-09-10 02:53:07.591375", + "step": 6318, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:07.643924", + "step": 6318, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014197020209394395, + "timestamp": "2025-09-10 02:53:07.646081", + "step": 6319, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:07.698785", + "step": 6319, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000491217419039458, + "timestamp": "2025-09-10 02:53:07.704615", + "step": 6320, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:07.757127", + "step": 6320, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001874455832876265, + "timestamp": "2025-09-10 02:53:07.759410", + "step": 6321, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:53:24.635358", + "step": 6321, + "epoch": 3 + }, + { + "type": "pplx", + "content": 25623746.72768853, + "timestamp": "2025-09-10 02:53:24.640638", + "step": 6321, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:24.697537", + "step": 6321, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002497486711945385, + "timestamp": "2025-09-10 02:53:24.700228", + "step": 6322, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:24.755203", + "step": 6322, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008116251789033413, + "timestamp": "2025-09-10 02:53:24.757226", + "step": 6323, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:24.810884", + "step": 6323, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002907492744270712, + "timestamp": "2025-09-10 02:53:24.817011", + "step": 6324, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:24.873564", + "step": 6324, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.2124964086979162e-05, + "timestamp": "2025-09-10 02:53:24.877496", + "step": 6325, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:24.936907", + "step": 6325, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004172473563812673, + "timestamp": "2025-09-10 02:53:24.944113", + "step": 6326, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:25.009368", + "step": 6326, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001568460138514638, + "timestamp": "2025-09-10 02:53:25.020078", + "step": 6327, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:53:25.093776", + "step": 6327, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008111385977827013, + "timestamp": "2025-09-10 02:53:25.107126", + "step": 6328, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:25.160566", + "step": 6328, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007448896765708923, + "timestamp": "2025-09-10 02:53:25.169677", + "step": 6329, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:25.230643", + "step": 6329, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007777568884193897, + "timestamp": "2025-09-10 02:53:25.237213", + "step": 6330, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:25.297291", + "step": 6330, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008014828781597316, + "timestamp": "2025-09-10 02:53:25.299330", + "step": 6331, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:25.352642", + "step": 6331, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011677221627905965, + "timestamp": "2025-09-10 02:53:25.358366", + "step": 6332, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:25.412505", + "step": 6332, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.402061676955782e-05, + "timestamp": "2025-09-10 02:53:25.422514", + "step": 6333, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:25.476141", + "step": 6333, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009365716832689941, + "timestamp": "2025-09-10 02:53:25.478152", + "step": 6334, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:25.533989", + "step": 6334, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011481484398245811, + "timestamp": "2025-09-10 02:53:25.536188", + "step": 6335, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:25.590892", + "step": 6335, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004527546465396881, + "timestamp": "2025-09-10 02:53:25.601474", + "step": 6336, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:25.654330", + "step": 6336, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004685709718614817, + "timestamp": "2025-09-10 02:53:25.657150", + "step": 6337, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:25.709620", + "step": 6337, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.837741238996387e-05, + "timestamp": "2025-09-10 02:53:25.712721", + "step": 6338, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:25.771977", + "step": 6338, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.559287718730047e-05, + "timestamp": "2025-09-10 02:53:25.773975", + "step": 6339, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:25.834825", + "step": 6339, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011402172822272405, + "timestamp": "2025-09-10 02:53:25.845211", + "step": 6340, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:25.897487", + "step": 6340, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.443385063903406e-05, + "timestamp": "2025-09-10 02:53:25.900114", + "step": 6341, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:25.957717", + "step": 6341, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013485607923939824, + "timestamp": "2025-09-10 02:53:25.959881", + "step": 6342, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:26.012811", + "step": 6342, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014597535482607782, + "timestamp": "2025-09-10 02:53:26.014930", + "step": 6343, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:26.067972", + "step": 6343, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.369337304728106e-05, + "timestamp": "2025-09-10 02:53:26.075075", + "step": 6344, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:26.142656", + "step": 6344, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011516348604345694, + "timestamp": "2025-09-10 02:53:26.144965", + "step": 6345, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:26.198266", + "step": 6345, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037112042773514986, + "timestamp": "2025-09-10 02:53:26.204679", + "step": 6346, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:26.263888", + "step": 6346, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006334986537694931, + "timestamp": "2025-09-10 02:53:26.266074", + "step": 6347, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:26.320396", + "step": 6347, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020315279834903777, + "timestamp": "2025-09-10 02:53:26.327730", + "step": 6348, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:26.380702", + "step": 6348, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004242732655256987, + "timestamp": "2025-09-10 02:53:26.382852", + "step": 6349, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:26.435996", + "step": 6349, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.986769272363745e-05, + "timestamp": "2025-09-10 02:53:26.439838", + "step": 6350, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:26.495395", + "step": 6350, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.9768902095383964e-05, + "timestamp": "2025-09-10 02:53:26.498188", + "step": 6351, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:26.551085", + "step": 6351, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03486521542072296, + "timestamp": "2025-09-10 02:53:26.557009", + "step": 6352, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:26.609023", + "step": 6352, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04174187034368515, + "timestamp": "2025-09-10 02:53:26.611012", + "step": 6353, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:26.663773", + "step": 6353, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.407457552384585e-05, + "timestamp": "2025-09-10 02:53:26.666597", + "step": 6354, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:26.719374", + "step": 6354, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010306506737833843, + "timestamp": "2025-09-10 02:53:26.725986", + "step": 6355, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:26.779139", + "step": 6355, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.052934506646125e-06, + "timestamp": "2025-09-10 02:53:26.784981", + "step": 6356, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:26.837355", + "step": 6356, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034755567321553826, + "timestamp": "2025-09-10 02:53:26.847498", + "step": 6357, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:26.901942", + "step": 6357, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005519238184206188, + "timestamp": "2025-09-10 02:53:26.904167", + "step": 6358, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:26.957952", + "step": 6358, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001442223001504317, + "timestamp": "2025-09-10 02:53:26.960120", + "step": 6359, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:27.013551", + "step": 6359, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010180289391428232, + "timestamp": "2025-09-10 02:53:27.019719", + "step": 6360, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:27.072664", + "step": 6360, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.5201082684798166e-05, + "timestamp": "2025-09-10 02:53:27.075135", + "step": 6361, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:27.135535", + "step": 6361, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007746697519905865, + "timestamp": "2025-09-10 02:53:27.146216", + "step": 6362, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:27.199097", + "step": 6362, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001340774237178266, + "timestamp": "2025-09-10 02:53:27.201454", + "step": 6363, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:27.254708", + "step": 6363, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010940327309072018, + "timestamp": "2025-09-10 02:53:27.260583", + "step": 6364, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:27.312905", + "step": 6364, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010309758363291621, + "timestamp": "2025-09-10 02:53:27.315026", + "step": 6365, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:27.368257", + "step": 6365, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046365702291950583, + "timestamp": "2025-09-10 02:53:27.370284", + "step": 6366, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:53:27.437055", + "step": 6366, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015972986875567585, + "timestamp": "2025-09-10 02:53:27.449287", + "step": 6367, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:27.503891", + "step": 6367, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.685276508098468e-05, + "timestamp": "2025-09-10 02:53:27.510120", + "step": 6368, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:27.563084", + "step": 6368, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005840769503265619, + "timestamp": "2025-09-10 02:53:27.565305", + "step": 6369, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:27.618784", + "step": 6369, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017197491833940148, + "timestamp": "2025-09-10 02:53:27.620963", + "step": 6370, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:53:27.687177", + "step": 6370, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002981850178912282, + "timestamp": "2025-09-10 02:53:27.699372", + "step": 6371, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:53:27.772787", + "step": 6371, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013454955769702792, + "timestamp": "2025-09-10 02:53:27.787064", + "step": 6372, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:27.839441", + "step": 6372, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002641402243170887, + "timestamp": "2025-09-10 02:53:27.841856", + "step": 6373, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:27.894835", + "step": 6373, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016312948719132692, + "timestamp": "2025-09-10 02:53:27.897001", + "step": 6374, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:27.951591", + "step": 6374, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.924271368305199e-05, + "timestamp": "2025-09-10 02:53:27.961414", + "step": 6375, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:28.016772", + "step": 6375, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.935371442930773e-05, + "timestamp": "2025-09-10 02:53:28.027356", + "step": 6376, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:53:28.099814", + "step": 6376, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05012155696749687, + "timestamp": "2025-09-10 02:53:28.114986", + "step": 6377, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:28.169273", + "step": 6377, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021866110910195857, + "timestamp": "2025-09-10 02:53:28.179067", + "step": 6378, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:28.232060", + "step": 6378, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001106784911826253, + "timestamp": "2025-09-10 02:53:28.234420", + "step": 6379, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:28.287853", + "step": 6379, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042406810098327696, + "timestamp": "2025-09-10 02:53:28.295534", + "step": 6380, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:28.347633", + "step": 6380, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03518061712384224, + "timestamp": "2025-09-10 02:53:28.349983", + "step": 6381, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:28.403934", + "step": 6381, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012866409088019282, + "timestamp": "2025-09-10 02:53:28.413555", + "step": 6382, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:28.467180", + "step": 6382, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006021680892445147, + "timestamp": "2025-09-10 02:53:28.475339", + "step": 6383, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:28.528635", + "step": 6383, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0162067711353302, + "timestamp": "2025-09-10 02:53:28.534324", + "step": 6384, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:28.586867", + "step": 6384, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0058546424843370914, + "timestamp": "2025-09-10 02:53:28.588984", + "step": 6385, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:28.641703", + "step": 6385, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.4167162589728832e-05, + "timestamp": "2025-09-10 02:53:28.644861", + "step": 6386, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:28.697981", + "step": 6386, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.715995550621301e-05, + "timestamp": "2025-09-10 02:53:28.700252", + "step": 6387, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:28.753283", + "step": 6387, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.2044809156795964e-05, + "timestamp": "2025-09-10 02:53:28.760741", + "step": 6388, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:28.813041", + "step": 6388, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.692918810178526e-05, + "timestamp": "2025-09-10 02:53:28.815466", + "step": 6389, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:28.869528", + "step": 6389, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005919909570366144, + "timestamp": "2025-09-10 02:53:28.879132", + "step": 6390, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:28.932292", + "step": 6390, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008166728657670319, + "timestamp": "2025-09-10 02:53:28.934496", + "step": 6391, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:28.987320", + "step": 6391, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.9261107303900644e-05, + "timestamp": "2025-09-10 02:53:28.993042", + "step": 6392, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:29.046072", + "step": 6392, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018909344216808677, + "timestamp": "2025-09-10 02:53:29.054344", + "step": 6393, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:53:29.123712", + "step": 6393, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.440822810167447e-05, + "timestamp": "2025-09-10 02:53:29.136390", + "step": 6394, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:29.189592", + "step": 6394, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010338453284930438, + "timestamp": "2025-09-10 02:53:29.191832", + "step": 6395, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:53:29.253495", + "step": 6395, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003287374565843493, + "timestamp": "2025-09-10 02:53:29.265360", + "step": 6396, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:29.317765", + "step": 6396, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018978974549099803, + "timestamp": "2025-09-10 02:53:29.319847", + "step": 6397, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:53:29.380904", + "step": 6397, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.915746467304416e-05, + "timestamp": "2025-09-10 02:53:29.391849", + "step": 6398, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:29.445234", + "step": 6398, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015465223987121135, + "timestamp": "2025-09-10 02:53:29.448456", + "step": 6399, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:29.501559", + "step": 6399, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007834541029296815, + "timestamp": "2025-09-10 02:53:29.507833", + "step": 6400, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:29.562621", + "step": 6400, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.472758180578239e-05, + "timestamp": "2025-09-10 02:53:29.564847", + "step": 6401, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:29.622952", + "step": 6401, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008757556788623333, + "timestamp": "2025-09-10 02:53:29.633326", + "step": 6402, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:29.687339", + "step": 6402, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021163642406463623, + "timestamp": "2025-09-10 02:53:29.697008", + "step": 6403, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:29.755274", + "step": 6403, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.233611151808873e-05, + "timestamp": "2025-09-10 02:53:29.762698", + "step": 6404, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:29.816742", + "step": 6404, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006255786283873022, + "timestamp": "2025-09-10 02:53:29.819122", + "step": 6405, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:29.873873", + "step": 6405, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029943903791718185, + "timestamp": "2025-09-10 02:53:29.876168", + "step": 6406, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:29.930397", + "step": 6406, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017056668002624065, + "timestamp": "2025-09-10 02:53:29.934987", + "step": 6407, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:29.990577", + "step": 6407, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011894209455931559, + "timestamp": "2025-09-10 02:53:29.996387", + "step": 6408, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:30.048421", + "step": 6408, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.540851270779967e-05, + "timestamp": "2025-09-10 02:53:30.056743", + "step": 6409, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:30.110212", + "step": 6409, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.922144530108199e-05, + "timestamp": "2025-09-10 02:53:30.116870", + "step": 6410, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:30.171852", + "step": 6410, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003103779745288193, + "timestamp": "2025-09-10 02:53:30.180928", + "step": 6411, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:30.237944", + "step": 6411, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043122057104483247, + "timestamp": "2025-09-10 02:53:30.244966", + "step": 6412, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:30.306461", + "step": 6412, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034825937473215163, + "timestamp": "2025-09-10 02:53:30.318002", + "step": 6413, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:53:30.386908", + "step": 6413, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000860480242408812, + "timestamp": "2025-09-10 02:53:30.399437", + "step": 6414, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:30.453621", + "step": 6414, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005650802631862462, + "timestamp": "2025-09-10 02:53:30.455818", + "step": 6415, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:30.508995", + "step": 6415, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009750666213221848, + "timestamp": "2025-09-10 02:53:30.515270", + "step": 6416, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:30.572175", + "step": 6416, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016369502991437912, + "timestamp": "2025-09-10 02:53:30.583389", + "step": 6417, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:30.637005", + "step": 6417, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.572590064024553e-05, + "timestamp": "2025-09-10 02:53:30.643479", + "step": 6418, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:30.697282", + "step": 6418, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.367561268736608e-05, + "timestamp": "2025-09-10 02:53:30.703600", + "step": 6419, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:30.757429", + "step": 6419, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013939932687208056, + "timestamp": "2025-09-10 02:53:30.763611", + "step": 6420, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:30.819989", + "step": 6420, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001453318545827642, + "timestamp": "2025-09-10 02:53:30.822218", + "step": 6421, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:30.876569", + "step": 6421, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007501809159293771, + "timestamp": "2025-09-10 02:53:30.886363", + "step": 6422, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:30.939690", + "step": 6422, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007267541368491948, + "timestamp": "2025-09-10 02:53:30.941753", + "step": 6423, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:30.994705", + "step": 6423, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015470579965040088, + "timestamp": "2025-09-10 02:53:31.001906", + "step": 6424, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:31.054506", + "step": 6424, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04970637708902359, + "timestamp": "2025-09-10 02:53:31.056778", + "step": 6425, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:31.109457", + "step": 6425, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000580643187277019, + "timestamp": "2025-09-10 02:53:31.111694", + "step": 6426, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:31.171715", + "step": 6426, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001214796444401145, + "timestamp": "2025-09-10 02:53:31.182445", + "step": 6427, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:31.235524", + "step": 6427, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005449260352179408, + "timestamp": "2025-09-10 02:53:31.241164", + "step": 6428, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:31.293458", + "step": 6428, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018538626318331808, + "timestamp": "2025-09-10 02:53:31.295517", + "step": 6429, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:31.348071", + "step": 6429, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002393820323050022, + "timestamp": "2025-09-10 02:53:31.351277", + "step": 6430, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:31.404812", + "step": 6430, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010630719771143049, + "timestamp": "2025-09-10 02:53:31.406983", + "step": 6431, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:53:31.487130", + "step": 6431, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002240373141830787, + "timestamp": "2025-09-10 02:53:31.502992", + "step": 6432, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:31.555977", + "step": 6432, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007287138141691685, + "timestamp": "2025-09-10 02:53:31.558189", + "step": 6433, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:31.612876", + "step": 6433, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.3637529264087789e-05, + "timestamp": "2025-09-10 02:53:31.622673", + "step": 6434, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:53:31.674781", + "step": 6434, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012591794366016984, + "timestamp": "2025-09-10 02:53:31.676868", + "step": 6435, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:31.729778", + "step": 6435, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013732012594118714, + "timestamp": "2025-09-10 02:53:31.738829", + "step": 6436, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:31.791486", + "step": 6436, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03234048932790756, + "timestamp": "2025-09-10 02:53:31.793562", + "step": 6437, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:53:31.855184", + "step": 6437, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011566213797777891, + "timestamp": "2025-09-10 02:53:31.866266", + "step": 6438, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:31.919785", + "step": 6438, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04029754176735878, + "timestamp": "2025-09-10 02:53:31.926311", + "step": 6439, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:31.979264", + "step": 6439, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011032062582671642, + "timestamp": "2025-09-10 02:53:31.986131", + "step": 6440, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:32.046013", + "step": 6440, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.85177076573018e-05, + "timestamp": "2025-09-10 02:53:32.057572", + "step": 6441, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:32.110386", + "step": 6441, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.043669749051332474, + "timestamp": "2025-09-10 02:53:32.113633", + "step": 6442, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:32.166240", + "step": 6442, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.031538255512714386, + "timestamp": "2025-09-10 02:53:32.168513", + "step": 6443, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:32.221582", + "step": 6443, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021350120368879288, + "timestamp": "2025-09-10 02:53:32.227113", + "step": 6444, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:32.279363", + "step": 6444, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003119015309493989, + "timestamp": "2025-09-10 02:53:32.281470", + "step": 6445, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:32.334068", + "step": 6445, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018298991199117154, + "timestamp": "2025-09-10 02:53:32.336274", + "step": 6446, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:53:32.404626", + "step": 6446, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034401267766952515, + "timestamp": "2025-09-10 02:53:32.417251", + "step": 6447, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:32.470032", + "step": 6447, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014745743246749043, + "timestamp": "2025-09-10 02:53:32.475506", + "step": 6448, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:32.527662", + "step": 6448, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02622535452246666, + "timestamp": "2025-09-10 02:53:32.530742", + "step": 6449, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:32.583908", + "step": 6449, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010449716355651617, + "timestamp": "2025-09-10 02:53:32.586040", + "step": 6450, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:32.639067", + "step": 6450, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003003651276230812, + "timestamp": "2025-09-10 02:53:32.641094", + "step": 6451, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:32.694057", + "step": 6451, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013862905325368047, + "timestamp": "2025-09-10 02:53:32.699713", + "step": 6452, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:32.756357", + "step": 6452, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014461170649155974, + "timestamp": "2025-09-10 02:53:32.767540", + "step": 6453, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:32.820991", + "step": 6453, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005308613646775484, + "timestamp": "2025-09-10 02:53:32.823690", + "step": 6454, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:32.876856", + "step": 6454, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005282104248180985, + "timestamp": "2025-09-10 02:53:32.879608", + "step": 6455, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:32.933009", + "step": 6455, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011777032632380724, + "timestamp": "2025-09-10 02:53:32.938615", + "step": 6456, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 12160073886080.0 + }, + "timestamp": "2025-09-10 02:53:33.027976", + "step": 6456, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011136236600577831, + "timestamp": "2025-09-10 02:53:33.046709", + "step": 6457, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:33.100441", + "step": 6457, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016885899007320404, + "timestamp": "2025-09-10 02:53:33.102565", + "step": 6458, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:33.155816", + "step": 6458, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022576468472834677, + "timestamp": "2025-09-10 02:53:33.165371", + "step": 6459, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:33.218626", + "step": 6459, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009160241461358964, + "timestamp": "2025-09-10 02:53:33.224381", + "step": 6460, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:33.277079", + "step": 6460, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003347193996887654, + "timestamp": "2025-09-10 02:53:33.279281", + "step": 6461, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:33.332252", + "step": 6461, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019199053349439055, + "timestamp": "2025-09-10 02:53:33.334564", + "step": 6462, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:33.387522", + "step": 6462, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007015415467321873, + "timestamp": "2025-09-10 02:53:33.389897", + "step": 6463, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:33.443171", + "step": 6463, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020852791203651577, + "timestamp": "2025-09-10 02:53:33.449147", + "step": 6464, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:53:33.517129", + "step": 6464, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001491163275204599, + "timestamp": "2025-09-10 02:53:33.531061", + "step": 6465, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:33.591819", + "step": 6465, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005892434855923057, + "timestamp": "2025-09-10 02:53:33.602549", + "step": 6466, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:33.655691", + "step": 6466, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015197346219792962, + "timestamp": "2025-09-10 02:53:33.661979", + "step": 6467, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:33.715113", + "step": 6467, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009250625967979431, + "timestamp": "2025-09-10 02:53:33.722366", + "step": 6468, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:53:50.834662", + "step": 6468, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23706738.432858456, + "timestamp": "2025-09-10 02:53:50.837225", + "step": 6468, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:50.892499", + "step": 6468, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000517043168656528, + "timestamp": "2025-09-10 02:53:50.894641", + "step": 6469, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:50.948793", + "step": 6469, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028855192940682173, + "timestamp": "2025-09-10 02:53:50.950978", + "step": 6470, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:51.004319", + "step": 6470, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001393310958519578, + "timestamp": "2025-09-10 02:53:51.006881", + "step": 6471, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:51.062842", + "step": 6471, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002278887404827401, + "timestamp": "2025-09-10 02:53:51.069151", + "step": 6472, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:51.121882", + "step": 6472, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002905389526858926, + "timestamp": "2025-09-10 02:53:51.124080", + "step": 6473, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:51.177778", + "step": 6473, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012196651368867606, + "timestamp": "2025-09-10 02:53:51.185662", + "step": 6474, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:51.239664", + "step": 6474, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014588789781555533, + "timestamp": "2025-09-10 02:53:51.241866", + "step": 6475, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:51.297712", + "step": 6475, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007198529201559722, + "timestamp": "2025-09-10 02:53:51.308131", + "step": 6476, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:51.361682", + "step": 6476, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008955709636211395, + "timestamp": "2025-09-10 02:53:51.363975", + "step": 6477, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:51.417302", + "step": 6477, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000317004305543378, + "timestamp": "2025-09-10 02:53:51.423754", + "step": 6478, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:51.479617", + "step": 6478, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010838760063052177, + "timestamp": "2025-09-10 02:53:51.481716", + "step": 6479, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:51.535085", + "step": 6479, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011980189010500908, + "timestamp": "2025-09-10 02:53:51.541155", + "step": 6480, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:51.594604", + "step": 6480, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007219035644084215, + "timestamp": "2025-09-10 02:53:51.600857", + "step": 6481, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:51.653795", + "step": 6481, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030097621493041515, + "timestamp": "2025-09-10 02:53:51.656144", + "step": 6482, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:51.709990", + "step": 6482, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002602596068754792, + "timestamp": "2025-09-10 02:53:51.712035", + "step": 6483, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:51.765790", + "step": 6483, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004644592700060457, + "timestamp": "2025-09-10 02:53:51.771771", + "step": 6484, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:51.825950", + "step": 6484, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019527435069903731, + "timestamp": "2025-09-10 02:53:51.828177", + "step": 6485, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:51.881260", + "step": 6485, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00231377431191504, + "timestamp": "2025-09-10 02:53:51.883713", + "step": 6486, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:51.937940", + "step": 6486, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004013167636003345, + "timestamp": "2025-09-10 02:53:51.947519", + "step": 6487, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:52.001129", + "step": 6487, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018575329158920795, + "timestamp": "2025-09-10 02:53:52.006994", + "step": 6488, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:52.059715", + "step": 6488, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000344709464116022, + "timestamp": "2025-09-10 02:53:52.062815", + "step": 6489, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:52.115937", + "step": 6489, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023524740245193243, + "timestamp": "2025-09-10 02:53:52.117982", + "step": 6490, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:52.176429", + "step": 6490, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010268871672451496, + "timestamp": "2025-09-10 02:53:52.186841", + "step": 6491, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:52.242105", + "step": 6491, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018025030149146914, + "timestamp": "2025-09-10 02:53:52.249911", + "step": 6492, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:52.304178", + "step": 6492, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006212440202943981, + "timestamp": "2025-09-10 02:53:52.306466", + "step": 6493, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:52.360400", + "step": 6493, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001148225157521665, + "timestamp": "2025-09-10 02:53:52.368321", + "step": 6494, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:53:52.430393", + "step": 6494, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004987604916095734, + "timestamp": "2025-09-10 02:53:52.441537", + "step": 6495, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:52.496006", + "step": 6495, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006656069308519363, + "timestamp": "2025-09-10 02:53:52.504794", + "step": 6496, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:52.557690", + "step": 6496, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032480843365192413, + "timestamp": "2025-09-10 02:53:52.559975", + "step": 6497, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:53:52.626084", + "step": 6497, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003583710640668869, + "timestamp": "2025-09-10 02:53:52.638311", + "step": 6498, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:52.692064", + "step": 6498, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006013225065544248, + "timestamp": "2025-09-10 02:53:52.700266", + "step": 6499, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:52.753263", + "step": 6499, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00952020101249218, + "timestamp": "2025-09-10 02:53:52.759594", + "step": 6500, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 6500", + "timestamp": "2025-09-10 02:53:53.267713", + "step": 6500, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:53.325965", + "step": 6500, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012291369494050741, + "timestamp": "2025-09-10 02:53:53.328467", + "step": 6501, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:53:53.383556", + "step": 6501, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013674128567799926, + "timestamp": "2025-09-10 02:53:53.385999", + "step": 6502, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:53.439551", + "step": 6502, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005135418614372611, + "timestamp": "2025-09-10 02:53:53.441775", + "step": 6503, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:53.494717", + "step": 6503, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014980064588598907, + "timestamp": "2025-09-10 02:53:53.501293", + "step": 6504, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:53:53.566044", + "step": 6504, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026989339385181665, + "timestamp": "2025-09-10 02:53:53.579276", + "step": 6505, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:53.634602", + "step": 6505, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003520399332046509, + "timestamp": "2025-09-10 02:53:53.640414", + "step": 6506, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:53:53.702245", + "step": 6506, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008410373702645302, + "timestamp": "2025-09-10 02:53:53.713362", + "step": 6507, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:53.767549", + "step": 6507, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025949280243366957, + "timestamp": "2025-09-10 02:53:53.773753", + "step": 6508, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:53.826078", + "step": 6508, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006233364692889154, + "timestamp": "2025-09-10 02:53:53.828218", + "step": 6509, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:53.881248", + "step": 6509, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003180032828822732, + "timestamp": "2025-09-10 02:53:53.883787", + "step": 6510, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:53.937217", + "step": 6510, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014026327698957175, + "timestamp": "2025-09-10 02:53:53.939426", + "step": 6511, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:53.993281", + "step": 6511, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010266329627484083, + "timestamp": "2025-09-10 02:53:53.999728", + "step": 6512, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:54.053923", + "step": 6512, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014553897781297565, + "timestamp": "2025-09-10 02:53:54.056289", + "step": 6513, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:54.111797", + "step": 6513, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036328397691249847, + "timestamp": "2025-09-10 02:53:54.121223", + "step": 6514, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:54.180006", + "step": 6514, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004422703292220831, + "timestamp": "2025-09-10 02:53:54.190413", + "step": 6515, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:54.249791", + "step": 6515, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021767716389149427, + "timestamp": "2025-09-10 02:53:54.261008", + "step": 6516, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:54.313849", + "step": 6516, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010643379937391728, + "timestamp": "2025-09-10 02:53:54.316118", + "step": 6517, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:54.369395", + "step": 6517, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007238482357934117, + "timestamp": "2025-09-10 02:53:54.371763", + "step": 6518, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:54.425814", + "step": 6518, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008917992934584618, + "timestamp": "2025-09-10 02:53:54.427952", + "step": 6519, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:54.481119", + "step": 6519, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.946386904222891e-05, + "timestamp": "2025-09-10 02:53:54.487176", + "step": 6520, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:54.543900", + "step": 6520, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006381779909133911, + "timestamp": "2025-09-10 02:53:54.555105", + "step": 6521, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:54.608324", + "step": 6521, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010170700988965109, + "timestamp": "2025-09-10 02:53:54.610838", + "step": 6522, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:54.664957", + "step": 6522, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005070645362138748, + "timestamp": "2025-09-10 02:53:54.673078", + "step": 6523, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:54.726884", + "step": 6523, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010211608605459332, + "timestamp": "2025-09-10 02:53:54.734117", + "step": 6524, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:53:54.787306", + "step": 6524, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017876082565635443, + "timestamp": "2025-09-10 02:53:54.789586", + "step": 6525, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:54.845344", + "step": 6525, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002271537232445553, + "timestamp": "2025-09-10 02:53:54.854967", + "step": 6526, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:54.909215", + "step": 6526, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008145435713231564, + "timestamp": "2025-09-10 02:53:54.912946", + "step": 6527, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:54.966898", + "step": 6527, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027083768509328365, + "timestamp": "2025-09-10 02:53:54.973772", + "step": 6528, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:55.026720", + "step": 6528, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006037470884621143, + "timestamp": "2025-09-10 02:53:55.036662", + "step": 6529, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:53:55.091448", + "step": 6529, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046910453238524497, + "timestamp": "2025-09-10 02:53:55.093972", + "step": 6530, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:55.155903", + "step": 6530, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018874193483497947, + "timestamp": "2025-09-10 02:53:55.166650", + "step": 6531, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:55.220124", + "step": 6531, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042390087037347257, + "timestamp": "2025-09-10 02:53:55.227516", + "step": 6532, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:55.280902", + "step": 6532, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009650534018874168, + "timestamp": "2025-09-10 02:53:55.283240", + "step": 6533, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:55.336166", + "step": 6533, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010299842106178403, + "timestamp": "2025-09-10 02:53:55.344521", + "step": 6534, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:55.397568", + "step": 6534, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001963161921594292, + "timestamp": "2025-09-10 02:53:55.399901", + "step": 6535, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:53:55.465274", + "step": 6535, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001372817496303469, + "timestamp": "2025-09-10 02:53:55.477188", + "step": 6536, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:55.529969", + "step": 6536, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009262050152756274, + "timestamp": "2025-09-10 02:53:55.532199", + "step": 6537, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:55.586892", + "step": 6537, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005956540699116886, + "timestamp": "2025-09-10 02:53:55.596696", + "step": 6538, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:55.650477", + "step": 6538, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005263627972453833, + "timestamp": "2025-09-10 02:53:55.652928", + "step": 6539, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:55.706309", + "step": 6539, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020635077729821205, + "timestamp": "2025-09-10 02:53:55.712474", + "step": 6540, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:55.765653", + "step": 6540, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003491123905405402, + "timestamp": "2025-09-10 02:53:55.768325", + "step": 6541, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:53:55.821497", + "step": 6541, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.034317190526053e-05, + "timestamp": "2025-09-10 02:53:55.823854", + "step": 6542, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:55.878186", + "step": 6542, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027987625799141824, + "timestamp": "2025-09-10 02:53:55.880570", + "step": 6543, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:55.935817", + "step": 6543, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014497018128167838, + "timestamp": "2025-09-10 02:53:55.942538", + "step": 6544, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:55.995103", + "step": 6544, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026463859830982983, + "timestamp": "2025-09-10 02:53:55.997910", + "step": 6545, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:56.050998", + "step": 6545, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003814331954345107, + "timestamp": "2025-09-10 02:53:56.053386", + "step": 6546, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:56.106799", + "step": 6546, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011513993376865983, + "timestamp": "2025-09-10 02:53:56.109063", + "step": 6547, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:56.161700", + "step": 6547, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00196033320389688, + "timestamp": "2025-09-10 02:53:56.168003", + "step": 6548, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:56.220985", + "step": 6548, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.029767900705337524, + "timestamp": "2025-09-10 02:53:56.223596", + "step": 6549, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:56.277451", + "step": 6549, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009712413884699345, + "timestamp": "2025-09-10 02:53:56.283325", + "step": 6550, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:56.337370", + "step": 6550, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003727537696249783, + "timestamp": "2025-09-10 02:53:56.339519", + "step": 6551, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:56.393199", + "step": 6551, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031455809948965907, + "timestamp": "2025-09-10 02:53:56.403196", + "step": 6552, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:56.457062", + "step": 6552, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028856462449766695, + "timestamp": "2025-09-10 02:53:56.459268", + "step": 6553, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:53:56.513072", + "step": 6553, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010789121733978391, + "timestamp": "2025-09-10 02:53:56.522698", + "step": 6554, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:56.577708", + "step": 6554, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018292127060703933, + "timestamp": "2025-09-10 02:53:56.580156", + "step": 6555, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:56.634248", + "step": 6555, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044557778164744377, + "timestamp": "2025-09-10 02:53:56.640574", + "step": 6556, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:56.694428", + "step": 6556, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014421871164813638, + "timestamp": "2025-09-10 02:53:56.697109", + "step": 6557, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:56.750746", + "step": 6557, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0059631625190377235, + "timestamp": "2025-09-10 02:53:56.753532", + "step": 6558, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:56.807009", + "step": 6558, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004969359142705798, + "timestamp": "2025-09-10 02:53:56.809563", + "step": 6559, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:56.863096", + "step": 6559, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009079108131118119, + "timestamp": "2025-09-10 02:53:56.869690", + "step": 6560, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:56.922992", + "step": 6560, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012841433635912836, + "timestamp": "2025-09-10 02:53:56.930983", + "step": 6561, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:56.992214", + "step": 6561, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000660736404825002, + "timestamp": "2025-09-10 02:53:57.002941", + "step": 6562, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:57.057131", + "step": 6562, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005666794604621828, + "timestamp": "2025-09-10 02:53:57.059532", + "step": 6563, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:57.113473", + "step": 6563, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00102478195913136, + "timestamp": "2025-09-10 02:53:57.119778", + "step": 6564, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:57.172782", + "step": 6564, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013485388830304146, + "timestamp": "2025-09-10 02:53:57.175172", + "step": 6565, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:53:57.244015", + "step": 6565, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001536666095489636, + "timestamp": "2025-09-10 02:53:57.256582", + "step": 6566, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 848 + ], + "flops": 16960103024960.0 + }, + "timestamp": "2025-09-10 02:53:57.379965", + "step": 6566, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012508033541962504, + "timestamp": "2025-09-10 02:53:57.403939", + "step": 6567, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:57.475396", + "step": 6567, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001451898569939658, + "timestamp": "2025-09-10 02:53:57.485998", + "step": 6568, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:57.552281", + "step": 6568, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000552977027837187, + "timestamp": "2025-09-10 02:53:57.555219", + "step": 6569, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:57.620793", + "step": 6569, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020744935318361968, + "timestamp": "2025-09-10 02:53:57.625747", + "step": 6570, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:57.686107", + "step": 6570, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005688023287802935, + "timestamp": "2025-09-10 02:53:57.692337", + "step": 6571, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:57.758584", + "step": 6571, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023701158352196217, + "timestamp": "2025-09-10 02:53:57.766644", + "step": 6572, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:57.843154", + "step": 6572, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010350550524890423, + "timestamp": "2025-09-10 02:53:57.854683", + "step": 6573, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:53:57.933713", + "step": 6573, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031393878161907196, + "timestamp": "2025-09-10 02:53:57.944747", + "step": 6574, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:53:58.009012", + "step": 6574, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009653661982156336, + "timestamp": "2025-09-10 02:53:58.014377", + "step": 6575, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:53:58.102683", + "step": 6575, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010989164933562279, + "timestamp": "2025-09-10 02:53:58.117488", + "step": 6576, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:53:58.203536", + "step": 6576, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.298122752923518e-05, + "timestamp": "2025-09-10 02:53:58.217276", + "step": 6577, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:58.279177", + "step": 6577, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017397617921233177, + "timestamp": "2025-09-10 02:53:58.281721", + "step": 6578, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:58.344290", + "step": 6578, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001520134275779128, + "timestamp": "2025-09-10 02:53:58.356292", + "step": 6579, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:58.410398", + "step": 6579, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001435176673112437, + "timestamp": "2025-09-10 02:53:58.416628", + "step": 6580, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:58.469883", + "step": 6580, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003124767099507153, + "timestamp": "2025-09-10 02:53:58.477442", + "step": 6581, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:53:58.530975", + "step": 6581, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003484593238681555, + "timestamp": "2025-09-10 02:53:58.538844", + "step": 6582, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:58.593615", + "step": 6582, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002820561931002885, + "timestamp": "2025-09-10 02:53:58.595866", + "step": 6583, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:53:58.653779", + "step": 6583, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002700116310734302, + "timestamp": "2025-09-10 02:53:58.665025", + "step": 6584, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:58.717934", + "step": 6584, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.787747820955701e-05, + "timestamp": "2025-09-10 02:53:58.720079", + "step": 6585, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:58.773538", + "step": 6585, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011721830815076828, + "timestamp": "2025-09-10 02:53:58.780270", + "step": 6586, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:58.833534", + "step": 6586, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013192297774367034, + "timestamp": "2025-09-10 02:53:58.835936", + "step": 6587, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:53:58.888556", + "step": 6587, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015103282930795103, + "timestamp": "2025-09-10 02:53:58.894623", + "step": 6588, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:53:58.959647", + "step": 6588, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002405633422313258, + "timestamp": "2025-09-10 02:53:58.972898", + "step": 6589, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:53:59.026019", + "step": 6589, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.490548599278554e-05, + "timestamp": "2025-09-10 02:53:59.028228", + "step": 6590, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:59.082896", + "step": 6590, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004959744401276112, + "timestamp": "2025-09-10 02:53:59.092632", + "step": 6591, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:53:59.146471", + "step": 6591, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007809832692146301, + "timestamp": "2025-09-10 02:53:59.153661", + "step": 6592, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:59.206902", + "step": 6592, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008047600276768208, + "timestamp": "2025-09-10 02:53:59.210697", + "step": 6593, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:53:59.279871", + "step": 6593, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016849783423822373, + "timestamp": "2025-09-10 02:53:59.292092", + "step": 6594, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:59.346093", + "step": 6594, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005317169707268476, + "timestamp": "2025-09-10 02:53:59.349891", + "step": 6595, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:59.404111", + "step": 6595, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018430011114105582, + "timestamp": "2025-09-10 02:53:59.414425", + "step": 6596, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:53:59.479311", + "step": 6596, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001944272080436349, + "timestamp": "2025-09-10 02:53:59.490901", + "step": 6597, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:59.556393", + "step": 6597, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018813603674061596, + "timestamp": "2025-09-10 02:53:59.558670", + "step": 6598, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:59.621189", + "step": 6598, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002565057249739766, + "timestamp": "2025-09-10 02:53:59.629532", + "step": 6599, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:53:59.692916", + "step": 6599, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016462865751236677, + "timestamp": "2025-09-10 02:53:59.703519", + "step": 6600, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:53:59.762275", + "step": 6600, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01904921419918537, + "timestamp": "2025-09-10 02:53:59.764723", + "step": 6601, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:53:59.824371", + "step": 6601, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003179634688422084, + "timestamp": "2025-09-10 02:53:59.827315", + "step": 6602, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:53:59.885992", + "step": 6602, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002889272873289883, + "timestamp": "2025-09-10 02:53:59.889921", + "step": 6603, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:53:59.951085", + "step": 6603, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005600472795777023, + "timestamp": "2025-09-10 02:53:59.957700", + "step": 6604, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:00.012179", + "step": 6604, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005887220031581819, + "timestamp": "2025-09-10 02:54:00.015387", + "step": 6605, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:00.072385", + "step": 6605, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037788759800605476, + "timestamp": "2025-09-10 02:54:00.081439", + "step": 6606, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:00.138928", + "step": 6606, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008682940970174968, + "timestamp": "2025-09-10 02:54:00.141920", + "step": 6607, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:54:00.240132", + "step": 6607, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028730365447700024, + "timestamp": "2025-09-10 02:54:00.259434", + "step": 6608, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:00.318832", + "step": 6608, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013414019485935569, + "timestamp": "2025-09-10 02:54:00.326915", + "step": 6609, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:00.383989", + "step": 6609, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.672200131812133e-05, + "timestamp": "2025-09-10 02:54:00.386121", + "step": 6610, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:54:00.454608", + "step": 6610, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005330638960003853, + "timestamp": "2025-09-10 02:54:00.467185", + "step": 6611, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:00.520130", + "step": 6611, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.573120870394632e-05, + "timestamp": "2025-09-10 02:54:00.525847", + "step": 6612, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:00.578308", + "step": 6612, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016775779658928514, + "timestamp": "2025-09-10 02:54:00.581389", + "step": 6613, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:00.635674", + "step": 6613, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.7891633837716654e-05, + "timestamp": "2025-09-10 02:54:00.645346", + "step": 6614, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:54:00.707046", + "step": 6614, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009671809384599328, + "timestamp": "2025-09-10 02:54:00.717966", + "step": 6615, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:54:17.514645", + "step": 6615, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23385001.859677915, + "timestamp": "2025-09-10 02:54:17.517245", + "step": 6615, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:17.571958", + "step": 6615, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028186680283397436, + "timestamp": "2025-09-10 02:54:17.579820", + "step": 6616, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:17.634004", + "step": 6616, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005324442172423005, + "timestamp": "2025-09-10 02:54:17.639042", + "step": 6617, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:54:17.700630", + "step": 6617, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.9863709894707426e-05, + "timestamp": "2025-09-10 02:54:17.711371", + "step": 6618, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:17.767144", + "step": 6618, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036892550997436047, + "timestamp": "2025-09-10 02:54:17.769525", + "step": 6619, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:17.826222", + "step": 6619, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038833674043416977, + "timestamp": "2025-09-10 02:54:17.832318", + "step": 6620, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:17.888929", + "step": 6620, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.060106872813776e-05, + "timestamp": "2025-09-10 02:54:17.899310", + "step": 6621, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:17.960237", + "step": 6621, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03629033640027046, + "timestamp": "2025-09-10 02:54:17.966411", + "step": 6622, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:54:18.020923", + "step": 6622, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.686465268197935e-05, + "timestamp": "2025-09-10 02:54:18.023565", + "step": 6623, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:18.079257", + "step": 6623, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.211958028259687e-05, + "timestamp": "2025-09-10 02:54:18.085985", + "step": 6624, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:18.139293", + "step": 6624, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.6827079889480956e-05, + "timestamp": "2025-09-10 02:54:18.144760", + "step": 6625, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:18.203213", + "step": 6625, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018668481789063662, + "timestamp": "2025-09-10 02:54:18.213665", + "step": 6626, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:18.267559", + "step": 6626, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.3264459084894042e-05, + "timestamp": "2025-09-10 02:54:18.270255", + "step": 6627, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:18.323337", + "step": 6627, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002850922988727689, + "timestamp": "2025-09-10 02:54:18.329543", + "step": 6628, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:18.382284", + "step": 6628, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019906852685380727, + "timestamp": "2025-09-10 02:54:18.384789", + "step": 6629, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:54:18.445292", + "step": 6629, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008952109492383897, + "timestamp": "2025-09-10 02:54:18.456048", + "step": 6630, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:18.511774", + "step": 6630, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.7878238547127694e-05, + "timestamp": "2025-09-10 02:54:18.514132", + "step": 6631, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:18.567494", + "step": 6631, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.679122164612636e-05, + "timestamp": "2025-09-10 02:54:18.573778", + "step": 6632, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:18.632055", + "step": 6632, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027644942747429013, + "timestamp": "2025-09-10 02:54:18.638007", + "step": 6633, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:18.696287", + "step": 6633, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034692331682890654, + "timestamp": "2025-09-10 02:54:18.698882", + "step": 6634, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:18.753258", + "step": 6634, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.929921876988374e-05, + "timestamp": "2025-09-10 02:54:18.755752", + "step": 6635, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:54:18.818136", + "step": 6635, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.747115210397169e-05, + "timestamp": "2025-09-10 02:54:18.830031", + "step": 6636, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:18.883677", + "step": 6636, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.895226735854521e-05, + "timestamp": "2025-09-10 02:54:18.886067", + "step": 6637, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:18.939694", + "step": 6637, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011700527975335717, + "timestamp": "2025-09-10 02:54:18.941934", + "step": 6638, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:18.996079", + "step": 6638, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.012048394652084e-05, + "timestamp": "2025-09-10 02:54:19.005679", + "step": 6639, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:19.058694", + "step": 6639, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004693043010775, + "timestamp": "2025-09-10 02:54:19.064943", + "step": 6640, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:19.120542", + "step": 6640, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.708974665845744e-05, + "timestamp": "2025-09-10 02:54:19.127521", + "step": 6641, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:19.181188", + "step": 6641, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025819733855314553, + "timestamp": "2025-09-10 02:54:19.183849", + "step": 6642, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:19.237686", + "step": 6642, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.41783985379152e-05, + "timestamp": "2025-09-10 02:54:19.247203", + "step": 6643, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:19.302405", + "step": 6643, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012563510797917843, + "timestamp": "2025-09-10 02:54:19.312945", + "step": 6644, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:19.366179", + "step": 6644, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.19809355965117e-05, + "timestamp": "2025-09-10 02:54:19.372519", + "step": 6645, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:19.427087", + "step": 6645, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017791613936424255, + "timestamp": "2025-09-10 02:54:19.434638", + "step": 6646, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:19.488953", + "step": 6646, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.796345016686246e-05, + "timestamp": "2025-09-10 02:54:19.494754", + "step": 6647, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:19.549708", + "step": 6647, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002098836557706818, + "timestamp": "2025-09-10 02:54:19.557949", + "step": 6648, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:19.612383", + "step": 6648, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.07032088935375214, + "timestamp": "2025-09-10 02:54:19.614568", + "step": 6649, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:19.667911", + "step": 6649, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012634944869205356, + "timestamp": "2025-09-10 02:54:19.670796", + "step": 6650, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:19.727055", + "step": 6650, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.4797018340905197e-05, + "timestamp": "2025-09-10 02:54:19.736662", + "step": 6651, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:19.791053", + "step": 6651, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.501683285227045e-05, + "timestamp": "2025-09-10 02:54:19.801374", + "step": 6652, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:19.854278", + "step": 6652, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016405986389145255, + "timestamp": "2025-09-10 02:54:19.859029", + "step": 6653, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:19.914082", + "step": 6653, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006561618647538126, + "timestamp": "2025-09-10 02:54:19.916264", + "step": 6654, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:19.971736", + "step": 6654, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.329329607979162e-06, + "timestamp": "2025-09-10 02:54:19.973995", + "step": 6655, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:20.029901", + "step": 6655, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026703623007051647, + "timestamp": "2025-09-10 02:54:20.036791", + "step": 6656, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:20.091295", + "step": 6656, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.446396921295673e-06, + "timestamp": "2025-09-10 02:54:20.094056", + "step": 6657, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:20.147950", + "step": 6657, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018190269474871457, + "timestamp": "2025-09-10 02:54:20.153574", + "step": 6658, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:20.207517", + "step": 6658, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.149678327143192e-05, + "timestamp": "2025-09-10 02:54:20.209700", + "step": 6659, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:20.263432", + "step": 6659, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034816188272088766, + "timestamp": "2025-09-10 02:54:20.270276", + "step": 6660, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:20.323098", + "step": 6660, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016923309885896742, + "timestamp": "2025-09-10 02:54:20.333057", + "step": 6661, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:20.387889", + "step": 6661, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05583750829100609, + "timestamp": "2025-09-10 02:54:20.390118", + "step": 6662, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:20.443551", + "step": 6662, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.884433681378141e-05, + "timestamp": "2025-09-10 02:54:20.445847", + "step": 6663, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:20.499357", + "step": 6663, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.830570782767609e-05, + "timestamp": "2025-09-10 02:54:20.505692", + "step": 6664, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:20.558413", + "step": 6664, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.02785177459009e-05, + "timestamp": "2025-09-10 02:54:20.560829", + "step": 6665, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:20.615081", + "step": 6665, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006628134869970381, + "timestamp": "2025-09-10 02:54:20.617328", + "step": 6666, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:20.672578", + "step": 6666, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002714922302402556, + "timestamp": "2025-09-10 02:54:20.677676", + "step": 6667, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:20.732577", + "step": 6667, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010888448741752654, + "timestamp": "2025-09-10 02:54:20.739085", + "step": 6668, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:20.793092", + "step": 6668, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011129614431411028, + "timestamp": "2025-09-10 02:54:20.798856", + "step": 6669, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:20.853585", + "step": 6669, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.8284735890338197e-05, + "timestamp": "2025-09-10 02:54:20.857428", + "step": 6670, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:54:20.922145", + "step": 6670, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009904785547405481, + "timestamp": "2025-09-10 02:54:20.933081", + "step": 6671, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:20.998750", + "step": 6671, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010126323904842138, + "timestamp": "2025-09-10 02:54:21.017600", + "step": 6672, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:21.075798", + "step": 6672, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011522303248057142, + "timestamp": "2025-09-10 02:54:21.081551", + "step": 6673, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:21.145979", + "step": 6673, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006222769618034363, + "timestamp": "2025-09-10 02:54:21.155387", + "step": 6674, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:21.215000", + "step": 6674, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.781493665475864e-05, + "timestamp": "2025-09-10 02:54:21.218512", + "step": 6675, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:21.282809", + "step": 6675, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.508735092822462e-05, + "timestamp": "2025-09-10 02:54:21.300500", + "step": 6676, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:21.378282", + "step": 6676, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018511201778892428, + "timestamp": "2025-09-10 02:54:21.388586", + "step": 6677, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:21.458911", + "step": 6677, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032804924994707108, + "timestamp": "2025-09-10 02:54:21.472931", + "step": 6678, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:21.535889", + "step": 6678, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004024840425699949, + "timestamp": "2025-09-10 02:54:21.548459", + "step": 6679, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:21.611605", + "step": 6679, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.6929701814660802e-05, + "timestamp": "2025-09-10 02:54:21.624378", + "step": 6680, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:21.689518", + "step": 6680, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008354577585123479, + "timestamp": "2025-09-10 02:54:21.701972", + "step": 6681, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:21.759701", + "step": 6681, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.089729529572651e-05, + "timestamp": "2025-09-10 02:54:21.774658", + "step": 6682, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:21.836970", + "step": 6682, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003190193383488804, + "timestamp": "2025-09-10 02:54:21.839086", + "step": 6683, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:21.892067", + "step": 6683, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002202236355515197, + "timestamp": "2025-09-10 02:54:21.898347", + "step": 6684, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:21.950857", + "step": 6684, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006472047418355942, + "timestamp": "2025-09-10 02:54:21.953253", + "step": 6685, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:22.007358", + "step": 6685, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014502814156003296, + "timestamp": "2025-09-10 02:54:22.016887", + "step": 6686, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:54:22.071215", + "step": 6686, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.736422801623121e-05, + "timestamp": "2025-09-10 02:54:22.073508", + "step": 6687, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:54:22.135631", + "step": 6687, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.034477561712265015, + "timestamp": "2025-09-10 02:54:22.147485", + "step": 6688, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:22.200777", + "step": 6688, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.9786597476922907e-05, + "timestamp": "2025-09-10 02:54:22.203128", + "step": 6689, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:22.256301", + "step": 6689, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.165489008300938e-05, + "timestamp": "2025-09-10 02:54:22.259129", + "step": 6690, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 8640052517568.0 + }, + "timestamp": "2025-09-10 02:54:22.328220", + "step": 6690, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009426060132682323, + "timestamp": "2025-09-10 02:54:22.340915", + "step": 6691, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:22.394691", + "step": 6691, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031298561953008175, + "timestamp": "2025-09-10 02:54:22.400819", + "step": 6692, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:22.454558", + "step": 6692, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.302199471974745e-05, + "timestamp": "2025-09-10 02:54:22.460965", + "step": 6693, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:22.514212", + "step": 6693, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.824464485864155e-05, + "timestamp": "2025-09-10 02:54:22.517056", + "step": 6694, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:22.570838", + "step": 6694, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016505582025274634, + "timestamp": "2025-09-10 02:54:22.573222", + "step": 6695, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:22.626707", + "step": 6695, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028228361043147743, + "timestamp": "2025-09-10 02:54:22.632817", + "step": 6696, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:54:22.702093", + "step": 6696, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005732628633268178, + "timestamp": "2025-09-10 02:54:22.716098", + "step": 6697, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:22.769939", + "step": 6697, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011515268124639988, + "timestamp": "2025-09-10 02:54:22.771954", + "step": 6698, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:22.825416", + "step": 6698, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014832020679023117, + "timestamp": "2025-09-10 02:54:22.827720", + "step": 6699, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:22.880848", + "step": 6699, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014225866470951587, + "timestamp": "2025-09-10 02:54:22.886804", + "step": 6700, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:22.939354", + "step": 6700, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028337922412902117, + "timestamp": "2025-09-10 02:54:22.941673", + "step": 6701, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:22.994523", + "step": 6701, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041280841105617583, + "timestamp": "2025-09-10 02:54:22.996547", + "step": 6702, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:23.049596", + "step": 6702, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.0511145673226565e-05, + "timestamp": "2025-09-10 02:54:23.051659", + "step": 6703, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:23.104571", + "step": 6703, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044902818626724184, + "timestamp": "2025-09-10 02:54:23.111642", + "step": 6704, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:23.163809", + "step": 6704, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003448054485488683, + "timestamp": "2025-09-10 02:54:23.165820", + "step": 6705, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:23.220058", + "step": 6705, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020704987400677055, + "timestamp": "2025-09-10 02:54:23.229861", + "step": 6706, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:23.283371", + "step": 6706, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031007863581180573, + "timestamp": "2025-09-10 02:54:23.285568", + "step": 6707, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:23.338640", + "step": 6707, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010566863784333691, + "timestamp": "2025-09-10 02:54:23.344726", + "step": 6708, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:23.397390", + "step": 6708, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044948840513825417, + "timestamp": "2025-09-10 02:54:23.399488", + "step": 6709, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:23.452180", + "step": 6709, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018663638911675662, + "timestamp": "2025-09-10 02:54:23.454475", + "step": 6710, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:23.507460", + "step": 6710, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022228548186831176, + "timestamp": "2025-09-10 02:54:23.510410", + "step": 6711, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:23.563611", + "step": 6711, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004220163100399077, + "timestamp": "2025-09-10 02:54:23.570796", + "step": 6712, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:23.623249", + "step": 6712, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015388162864837795, + "timestamp": "2025-09-10 02:54:23.625434", + "step": 6713, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:23.680377", + "step": 6713, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002114841714501381, + "timestamp": "2025-09-10 02:54:23.689988", + "step": 6714, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:23.744954", + "step": 6714, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003809299087151885, + "timestamp": "2025-09-10 02:54:23.754734", + "step": 6715, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:23.808207", + "step": 6715, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018899877613876015, + "timestamp": "2025-09-10 02:54:23.814363", + "step": 6716, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:23.866805", + "step": 6716, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011735771840903908, + "timestamp": "2025-09-10 02:54:23.869107", + "step": 6717, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:23.922364", + "step": 6717, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003337863367050886, + "timestamp": "2025-09-10 02:54:23.924628", + "step": 6718, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:23.978673", + "step": 6718, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002060003113001585, + "timestamp": "2025-09-10 02:54:23.988474", + "step": 6719, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:24.041671", + "step": 6719, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036181341856718063, + "timestamp": "2025-09-10 02:54:24.047814", + "step": 6720, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:24.101959", + "step": 6720, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008899550884962082, + "timestamp": "2025-09-10 02:54:24.103951", + "step": 6721, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:24.157114", + "step": 6721, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014222814934328198, + "timestamp": "2025-09-10 02:54:24.165016", + "step": 6722, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:24.218578", + "step": 6722, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038225733442232013, + "timestamp": "2025-09-10 02:54:24.224815", + "step": 6723, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:24.278129", + "step": 6723, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00885496474802494, + "timestamp": "2025-09-10 02:54:24.284369", + "step": 6724, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:54:24.337096", + "step": 6724, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002544673625379801, + "timestamp": "2025-09-10 02:54:24.339314", + "step": 6725, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:24.393043", + "step": 6725, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023553676437586546, + "timestamp": "2025-09-10 02:54:24.395330", + "step": 6726, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:24.448916", + "step": 6726, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001859004405559972, + "timestamp": "2025-09-10 02:54:24.451110", + "step": 6727, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:24.505588", + "step": 6727, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01435794122517109, + "timestamp": "2025-09-10 02:54:24.511726", + "step": 6728, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:24.566535", + "step": 6728, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.058423216221854e-05, + "timestamp": "2025-09-10 02:54:24.568981", + "step": 6729, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:24.623564", + "step": 6729, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.51773690758273e-05, + "timestamp": "2025-09-10 02:54:24.626097", + "step": 6730, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:24.679341", + "step": 6730, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003350157930981368, + "timestamp": "2025-09-10 02:54:24.681683", + "step": 6731, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:24.735359", + "step": 6731, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020194535318296403, + "timestamp": "2025-09-10 02:54:24.741560", + "step": 6732, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:24.794207", + "step": 6732, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005801634979434311, + "timestamp": "2025-09-10 02:54:24.796273", + "step": 6733, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:24.849054", + "step": 6733, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.747808558633551e-05, + "timestamp": "2025-09-10 02:54:24.851383", + "step": 6734, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:24.907099", + "step": 6734, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017583959270268679, + "timestamp": "2025-09-10 02:54:24.914988", + "step": 6735, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:54:24.976372", + "step": 6735, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021081813611090183, + "timestamp": "2025-09-10 02:54:24.987959", + "step": 6736, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:54:25.048637", + "step": 6736, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007808614755049348, + "timestamp": "2025-09-10 02:54:25.060164", + "step": 6737, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:25.113896", + "step": 6737, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.8432010620017536e-05, + "timestamp": "2025-09-10 02:54:25.116073", + "step": 6738, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:54:25.176039", + "step": 6738, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.287031283136457e-05, + "timestamp": "2025-09-10 02:54:25.186742", + "step": 6739, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:25.240629", + "step": 6739, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016147976741194725, + "timestamp": "2025-09-10 02:54:25.246712", + "step": 6740, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:25.299975", + "step": 6740, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.1653467380674556e-05, + "timestamp": "2025-09-10 02:54:25.302338", + "step": 6741, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:25.356087", + "step": 6741, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005793520831502974, + "timestamp": "2025-09-10 02:54:25.358229", + "step": 6742, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:25.411891", + "step": 6742, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003996819432359189, + "timestamp": "2025-09-10 02:54:25.414140", + "step": 6743, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:25.467442", + "step": 6743, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021318215876817703, + "timestamp": "2025-09-10 02:54:25.473966", + "step": 6744, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:25.526873", + "step": 6744, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040453224210068583, + "timestamp": "2025-09-10 02:54:25.529673", + "step": 6745, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:25.587648", + "step": 6745, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003764012362807989, + "timestamp": "2025-09-10 02:54:25.598072", + "step": 6746, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:25.652081", + "step": 6746, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010460015619173646, + "timestamp": "2025-09-10 02:54:25.654772", + "step": 6747, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:25.709953", + "step": 6747, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012851936044171453, + "timestamp": "2025-09-10 02:54:25.720491", + "step": 6748, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:25.774641", + "step": 6748, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038896952173672616, + "timestamp": "2025-09-10 02:54:25.777007", + "step": 6749, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:54:25.857007", + "step": 6749, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012903818860650063, + "timestamp": "2025-09-10 02:54:25.872009", + "step": 6750, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:25.927395", + "step": 6750, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016401773609686643, + "timestamp": "2025-09-10 02:54:25.929523", + "step": 6751, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:25.987942", + "step": 6751, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010856654989765957, + "timestamp": "2025-09-10 02:54:25.999143", + "step": 6752, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:26.052313", + "step": 6752, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001499974459875375, + "timestamp": "2025-09-10 02:54:26.054663", + "step": 6753, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:26.109075", + "step": 6753, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016375941049773246, + "timestamp": "2025-09-10 02:54:26.111442", + "step": 6754, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:26.165537", + "step": 6754, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024932180531322956, + "timestamp": "2025-09-10 02:54:26.167944", + "step": 6755, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:54:26.230016", + "step": 6755, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013631509500555694, + "timestamp": "2025-09-10 02:54:26.241686", + "step": 6756, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:26.295622", + "step": 6756, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.498567068367265e-05, + "timestamp": "2025-09-10 02:54:26.297958", + "step": 6757, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:54:26.366262", + "step": 6757, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039396085776388645, + "timestamp": "2025-09-10 02:54:26.378771", + "step": 6758, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:26.437901", + "step": 6758, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010092677985085174, + "timestamp": "2025-09-10 02:54:26.448267", + "step": 6759, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:26.503708", + "step": 6759, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.924220072804019e-05, + "timestamp": "2025-09-10 02:54:26.510153", + "step": 6760, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:26.563193", + "step": 6760, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006220812443643808, + "timestamp": "2025-09-10 02:54:26.565276", + "step": 6761, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:26.618786", + "step": 6761, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005389533471316099, + "timestamp": "2025-09-10 02:54:26.625123", + "step": 6762, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:54:43.457599", + "step": 6762, + "epoch": 3 + }, + { + "type": "pplx", + "content": 25709489.340655327, + "timestamp": "2025-09-10 02:54:43.460339", + "step": 6762, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:43.514618", + "step": 6762, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024434844963252544, + "timestamp": "2025-09-10 02:54:43.516606", + "step": 6763, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:43.570014", + "step": 6763, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022615003399550915, + "timestamp": "2025-09-10 02:54:43.575873", + "step": 6764, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:43.632173", + "step": 6764, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029734117561019957, + "timestamp": "2025-09-10 02:54:43.643397", + "step": 6765, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:43.704713", + "step": 6765, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020704036578536034, + "timestamp": "2025-09-10 02:54:43.719871", + "step": 6766, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:43.775942", + "step": 6766, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.4070613916555885e-06, + "timestamp": "2025-09-10 02:54:43.782902", + "step": 6767, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:43.850891", + "step": 6767, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003174375160597265, + "timestamp": "2025-09-10 02:54:43.862141", + "step": 6768, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:43.921109", + "step": 6768, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021674581803381443, + "timestamp": "2025-09-10 02:54:43.932634", + "step": 6769, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:43.991669", + "step": 6769, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004266251809895039, + "timestamp": "2025-09-10 02:54:43.997338", + "step": 6770, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:44.056415", + "step": 6770, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001336360292043537, + "timestamp": "2025-09-10 02:54:44.066066", + "step": 6771, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:44.127146", + "step": 6771, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016490685811731964, + "timestamp": "2025-09-10 02:54:44.137474", + "step": 6772, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:44.209500", + "step": 6772, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033168368972837925, + "timestamp": "2025-09-10 02:54:44.233944", + "step": 6773, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:44.311417", + "step": 6773, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0043604555539786816, + "timestamp": "2025-09-10 02:54:44.323862", + "step": 6774, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:44.397438", + "step": 6774, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.357293361565098e-05, + "timestamp": "2025-09-10 02:54:44.407080", + "step": 6775, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:44.468995", + "step": 6775, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.862401642138138e-05, + "timestamp": "2025-09-10 02:54:44.476295", + "step": 6776, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:44.544292", + "step": 6776, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001584481360623613, + "timestamp": "2025-09-10 02:54:44.551309", + "step": 6777, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:44.606774", + "step": 6777, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006461879820562899, + "timestamp": "2025-09-10 02:54:44.609148", + "step": 6778, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:44.662048", + "step": 6778, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011099242838099599, + "timestamp": "2025-09-10 02:54:44.664165", + "step": 6779, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:44.718045", + "step": 6779, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007155724451877177, + "timestamp": "2025-09-10 02:54:44.724151", + "step": 6780, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:44.776648", + "step": 6780, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.060069210827350616, + "timestamp": "2025-09-10 02:54:44.786951", + "step": 6781, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:44.840157", + "step": 6781, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.3121788621647283e-05, + "timestamp": "2025-09-10 02:54:44.849745", + "step": 6782, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:44.903540", + "step": 6782, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012703310931101441, + "timestamp": "2025-09-10 02:54:44.906073", + "step": 6783, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:44.959318", + "step": 6783, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.4759003281360492e-05, + "timestamp": "2025-09-10 02:54:44.968426", + "step": 6784, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:45.020785", + "step": 6784, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.132286656182259e-05, + "timestamp": "2025-09-10 02:54:45.023260", + "step": 6785, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:45.076094", + "step": 6785, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002933169307652861, + "timestamp": "2025-09-10 02:54:45.078193", + "step": 6786, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:45.131430", + "step": 6786, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005814902950078249, + "timestamp": "2025-09-10 02:54:45.133592", + "step": 6787, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:45.187016", + "step": 6787, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.8819564502337016e-05, + "timestamp": "2025-09-10 02:54:45.197409", + "step": 6788, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:54:45.256777", + "step": 6788, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004507180710788816, + "timestamp": "2025-09-10 02:54:45.268370", + "step": 6789, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:54:45.320792", + "step": 6789, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028855472919531167, + "timestamp": "2025-09-10 02:54:45.323134", + "step": 6790, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:54:45.376029", + "step": 6790, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.1611181435000617e-05, + "timestamp": "2025-09-10 02:54:45.378054", + "step": 6791, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:45.430429", + "step": 6791, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.1199240513378754e-05, + "timestamp": "2025-09-10 02:54:45.436276", + "step": 6792, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:45.487944", + "step": 6792, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010968972492264584, + "timestamp": "2025-09-10 02:54:45.491053", + "step": 6793, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 10240062230528.0 + }, + "timestamp": "2025-09-10 02:54:45.566858", + "step": 6793, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.6435873476439156e-05, + "timestamp": "2025-09-10 02:54:45.580946", + "step": 6794, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:45.633300", + "step": 6794, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002485661068931222, + "timestamp": "2025-09-10 02:54:45.636608", + "step": 6795, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:45.689316", + "step": 6795, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03919482231140137, + "timestamp": "2025-09-10 02:54:45.694893", + "step": 6796, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:45.746777", + "step": 6796, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.036987531930208206, + "timestamp": "2025-09-10 02:54:45.756993", + "step": 6797, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:45.810314", + "step": 6797, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004275764338672161, + "timestamp": "2025-09-10 02:54:45.812720", + "step": 6798, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:54:45.878715", + "step": 6798, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.209383329609409e-05, + "timestamp": "2025-09-10 02:54:45.890913", + "step": 6799, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:45.944968", + "step": 6799, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.097974280943163e-05, + "timestamp": "2025-09-10 02:54:45.950844", + "step": 6800, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:46.004181", + "step": 6800, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.338800110621378e-05, + "timestamp": "2025-09-10 02:54:46.006518", + "step": 6801, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:54:46.067939", + "step": 6801, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010365008347434923, + "timestamp": "2025-09-10 02:54:46.079051", + "step": 6802, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:46.131652", + "step": 6802, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05696597322821617, + "timestamp": "2025-09-10 02:54:46.134011", + "step": 6803, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:46.188195", + "step": 6803, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016694695223122835, + "timestamp": "2025-09-10 02:54:46.198757", + "step": 6804, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:54:46.263548", + "step": 6804, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.597422518301755e-05, + "timestamp": "2025-09-10 02:54:46.276786", + "step": 6805, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:46.329367", + "step": 6805, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008031840436160564, + "timestamp": "2025-09-10 02:54:46.331832", + "step": 6806, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:54:46.398241", + "step": 6806, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027756125200539827, + "timestamp": "2025-09-10 02:54:46.410488", + "step": 6807, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:46.463555", + "step": 6807, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013087447732686996, + "timestamp": "2025-09-10 02:54:46.469217", + "step": 6808, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:46.521582", + "step": 6808, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017276279628276825, + "timestamp": "2025-09-10 02:54:46.529925", + "step": 6809, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:46.582965", + "step": 6809, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000820252753328532, + "timestamp": "2025-09-10 02:54:46.585063", + "step": 6810, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:46.637940", + "step": 6810, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012021789734717458, + "timestamp": "2025-09-10 02:54:46.644396", + "step": 6811, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:46.697369", + "step": 6811, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021653142175637186, + "timestamp": "2025-09-10 02:54:46.703327", + "step": 6812, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:46.755527", + "step": 6812, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.710442822717596e-05, + "timestamp": "2025-09-10 02:54:46.765781", + "step": 6813, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:46.819110", + "step": 6813, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005414400948211551, + "timestamp": "2025-09-10 02:54:46.827337", + "step": 6814, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:46.879965", + "step": 6814, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010980067600030452, + "timestamp": "2025-09-10 02:54:46.882124", + "step": 6815, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:46.934468", + "step": 6815, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.160889410646632e-05, + "timestamp": "2025-09-10 02:54:46.940302", + "step": 6816, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:46.992671", + "step": 6816, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015756313223391771, + "timestamp": "2025-09-10 02:54:46.994834", + "step": 6817, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:47.047557", + "step": 6817, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00150921696331352, + "timestamp": "2025-09-10 02:54:47.055856", + "step": 6818, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:47.113447", + "step": 6818, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003578981850296259, + "timestamp": "2025-09-10 02:54:47.123845", + "step": 6819, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:47.176978", + "step": 6819, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007674887776374817, + "timestamp": "2025-09-10 02:54:47.182779", + "step": 6820, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:47.239365", + "step": 6820, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012018119014101103, + "timestamp": "2025-09-10 02:54:47.250594", + "step": 6821, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:47.303359", + "step": 6821, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006332649500109255, + "timestamp": "2025-09-10 02:54:47.305445", + "step": 6822, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:47.357966", + "step": 6822, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015875112148933113, + "timestamp": "2025-09-10 02:54:47.364368", + "step": 6823, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:47.417096", + "step": 6823, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028945859521627426, + "timestamp": "2025-09-10 02:54:47.425942", + "step": 6824, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:47.482476", + "step": 6824, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010640511027304456, + "timestamp": "2025-09-10 02:54:47.493672", + "step": 6825, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:47.546368", + "step": 6825, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002750347484834492, + "timestamp": "2025-09-10 02:54:47.552871", + "step": 6826, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:47.610655", + "step": 6826, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017221200978383422, + "timestamp": "2025-09-10 02:54:47.613447", + "step": 6827, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:47.667468", + "step": 6827, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025323693989776075, + "timestamp": "2025-09-10 02:54:47.673506", + "step": 6828, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:47.726256", + "step": 6828, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016511676367372274, + "timestamp": "2025-09-10 02:54:47.728336", + "step": 6829, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:47.782164", + "step": 6829, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004770257510244846, + "timestamp": "2025-09-10 02:54:47.784259", + "step": 6830, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:47.837576", + "step": 6830, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022807701316196471, + "timestamp": "2025-09-10 02:54:47.839917", + "step": 6831, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:47.892651", + "step": 6831, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015882852021604776, + "timestamp": "2025-09-10 02:54:47.898252", + "step": 6832, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:54:47.950227", + "step": 6832, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011089268373325467, + "timestamp": "2025-09-10 02:54:47.952495", + "step": 6833, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:48.005620", + "step": 6833, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01070477720350027, + "timestamp": "2025-09-10 02:54:48.015218", + "step": 6834, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:48.068058", + "step": 6834, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003968815319240093, + "timestamp": "2025-09-10 02:54:48.070244", + "step": 6835, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:48.123523", + "step": 6835, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012771779438480735, + "timestamp": "2025-09-10 02:54:48.129402", + "step": 6836, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:48.184171", + "step": 6836, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029552009073086083, + "timestamp": "2025-09-10 02:54:48.190900", + "step": 6837, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:48.243804", + "step": 6837, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011362083023414016, + "timestamp": "2025-09-10 02:54:48.246168", + "step": 6838, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:48.299423", + "step": 6838, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024538684636354446, + "timestamp": "2025-09-10 02:54:48.307626", + "step": 6839, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:48.360714", + "step": 6839, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003189076960552484, + "timestamp": "2025-09-10 02:54:48.366478", + "step": 6840, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:48.419664", + "step": 6840, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022763405286241323, + "timestamp": "2025-09-10 02:54:48.422024", + "step": 6841, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:48.476781", + "step": 6841, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.701670256210491e-05, + "timestamp": "2025-09-10 02:54:48.483004", + "step": 6842, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:48.535962", + "step": 6842, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043326723971404135, + "timestamp": "2025-09-10 02:54:48.538495", + "step": 6843, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:48.591310", + "step": 6843, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008735000155866146, + "timestamp": "2025-09-10 02:54:48.597296", + "step": 6844, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:48.649920", + "step": 6844, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.3418920035474e-05, + "timestamp": "2025-09-10 02:54:48.652075", + "step": 6845, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:48.704780", + "step": 6845, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009570607915520668, + "timestamp": "2025-09-10 02:54:48.711268", + "step": 6846, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:48.765939", + "step": 6846, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020284978381823748, + "timestamp": "2025-09-10 02:54:48.775752", + "step": 6847, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:48.828549", + "step": 6847, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.389665501657873e-05, + "timestamp": "2025-09-10 02:54:48.834622", + "step": 6848, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:48.888460", + "step": 6848, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005315937451086938, + "timestamp": "2025-09-10 02:54:48.897550", + "step": 6849, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:54:48.964068", + "step": 6849, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032117923256009817, + "timestamp": "2025-09-10 02:54:48.976314", + "step": 6850, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:54:49.037264", + "step": 6850, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00123793154489249, + "timestamp": "2025-09-10 02:54:49.048181", + "step": 6851, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:49.101799", + "step": 6851, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010277210967615247, + "timestamp": "2025-09-10 02:54:49.107839", + "step": 6852, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:49.164667", + "step": 6852, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001223025843501091, + "timestamp": "2025-09-10 02:54:49.175882", + "step": 6853, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:49.229444", + "step": 6853, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003343747928738594, + "timestamp": "2025-09-10 02:54:49.231884", + "step": 6854, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:49.284400", + "step": 6854, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002088952751364559, + "timestamp": "2025-09-10 02:54:49.286834", + "step": 6855, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:49.339605", + "step": 6855, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011284074280411005, + "timestamp": "2025-09-10 02:54:49.346951", + "step": 6856, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:49.399147", + "step": 6856, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004039173945784569, + "timestamp": "2025-09-10 02:54:49.405935", + "step": 6857, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:49.464041", + "step": 6857, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048438538215123117, + "timestamp": "2025-09-10 02:54:49.474487", + "step": 6858, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:49.527583", + "step": 6858, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042181755998171866, + "timestamp": "2025-09-10 02:54:49.529880", + "step": 6859, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:49.584969", + "step": 6859, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008075033547356725, + "timestamp": "2025-09-10 02:54:49.595546", + "step": 6860, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:49.648093", + "step": 6860, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011093399953097105, + "timestamp": "2025-09-10 02:54:49.650342", + "step": 6861, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:49.703361", + "step": 6861, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029837351758033037, + "timestamp": "2025-09-10 02:54:49.705555", + "step": 6862, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:49.758221", + "step": 6862, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002768567646853626, + "timestamp": "2025-09-10 02:54:49.760456", + "step": 6863, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:54:49.812989", + "step": 6863, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000653381459414959, + "timestamp": "2025-09-10 02:54:49.818571", + "step": 6864, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:54:49.878019", + "step": 6864, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002829869044944644, + "timestamp": "2025-09-10 02:54:49.890052", + "step": 6865, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:49.943305", + "step": 6865, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003227445122320205, + "timestamp": "2025-09-10 02:54:49.945571", + "step": 6866, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:49.999926", + "step": 6866, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001285174279473722, + "timestamp": "2025-09-10 02:54:50.009710", + "step": 6867, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:50.062418", + "step": 6867, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003444342641159892, + "timestamp": "2025-09-10 02:54:50.068115", + "step": 6868, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:50.120463", + "step": 6868, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028381331358104944, + "timestamp": "2025-09-10 02:54:50.122707", + "step": 6869, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:50.175218", + "step": 6869, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005986435571685433, + "timestamp": "2025-09-10 02:54:50.177684", + "step": 6870, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:54:50.238692", + "step": 6870, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004264956805855036, + "timestamp": "2025-09-10 02:54:50.249594", + "step": 6871, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:50.302104", + "step": 6871, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006066313362680376, + "timestamp": "2025-09-10 02:54:50.307805", + "step": 6872, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:50.359643", + "step": 6872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038845862727612257, + "timestamp": "2025-09-10 02:54:50.361881", + "step": 6873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:50.414169", + "step": 6873, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00452636880800128, + "timestamp": "2025-09-10 02:54:50.416471", + "step": 6874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:54:50.470558", + "step": 6874, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002486585173755884, + "timestamp": "2025-09-10 02:54:50.480150", + "step": 6875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:50.532924", + "step": 6875, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007920735515654087, + "timestamp": "2025-09-10 02:54:50.538714", + "step": 6876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:50.591224", + "step": 6876, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034601124934852123, + "timestamp": "2025-09-10 02:54:50.593364", + "step": 6877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:54:50.654075", + "step": 6877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003639400820247829, + "timestamp": "2025-09-10 02:54:50.664927", + "step": 6878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:50.717906", + "step": 6878, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006268096040003002, + "timestamp": "2025-09-10 02:54:50.720773", + "step": 6879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:50.773184", + "step": 6879, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022460625041276217, + "timestamp": "2025-09-10 02:54:50.780479", + "step": 6880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:50.832593", + "step": 6880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001877809118013829, + "timestamp": "2025-09-10 02:54:50.839148", + "step": 6881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:50.892255", + "step": 6881, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.215891021885909e-05, + "timestamp": "2025-09-10 02:54:50.898785", + "step": 6882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:50.951255", + "step": 6882, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.2957565559190698e-05, + "timestamp": "2025-09-10 02:54:50.953388", + "step": 6883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:51.006321", + "step": 6883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004247861448675394, + "timestamp": "2025-09-10 02:54:51.012096", + "step": 6884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:51.064589", + "step": 6884, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009423012961633503, + "timestamp": "2025-09-10 02:54:51.071130", + "step": 6885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:51.124102", + "step": 6885, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0381709448993206, + "timestamp": "2025-09-10 02:54:51.126314", + "step": 6886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:51.179485", + "step": 6886, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006023383466526866, + "timestamp": "2025-09-10 02:54:51.181598", + "step": 6887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:51.234944", + "step": 6887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022470338735729456, + "timestamp": "2025-09-10 02:54:51.240675", + "step": 6888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:54:51.293439", + "step": 6888, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.219224789878353e-05, + "timestamp": "2025-09-10 02:54:51.301480", + "step": 6889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:51.355968", + "step": 6889, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015506960335187614, + "timestamp": "2025-09-10 02:54:51.365754", + "step": 6890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:51.419019", + "step": 6890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007731267251074314, + "timestamp": "2025-09-10 02:54:51.421198", + "step": 6891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:51.474004", + "step": 6891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004507832054514438, + "timestamp": "2025-09-10 02:54:51.479702", + "step": 6892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:51.531784", + "step": 6892, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043676866334863007, + "timestamp": "2025-09-10 02:54:51.533889", + "step": 6893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:54:51.591693", + "step": 6893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028911096160300076, + "timestamp": "2025-09-10 02:54:51.602060", + "step": 6894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:51.655361", + "step": 6894, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006486875936388969, + "timestamp": "2025-09-10 02:54:51.657857", + "step": 6895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:51.710680", + "step": 6895, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047094575711525977, + "timestamp": "2025-09-10 02:54:51.716484", + "step": 6896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:51.768601", + "step": 6896, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013813263736665249, + "timestamp": "2025-09-10 02:54:51.770791", + "step": 6897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:54:51.823594", + "step": 6897, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023737193550914526, + "timestamp": "2025-09-10 02:54:51.830137", + "step": 6898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:54:51.882663", + "step": 6898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007876521558500826, + "timestamp": "2025-09-10 02:54:51.884928", + "step": 6899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:51.937310", + "step": 6899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002053142961813137, + "timestamp": "2025-09-10 02:54:51.943120", + "step": 6900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:54:52.002536", + "step": 6900, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008010483579710126, + "timestamp": "2025-09-10 02:54:52.014315", + "step": 6901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:54:52.067103", + "step": 6901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018803616694640368, + "timestamp": "2025-09-10 02:54:52.069257", + "step": 6902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:54:52.121903", + "step": 6902, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.800043335417286e-05, + "timestamp": "2025-09-10 02:54:52.124025", + "step": 6903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:54:52.176378", + "step": 6903, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004912449512630701, + "timestamp": "2025-09-10 02:54:52.181933", + "step": 6904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:54:52.252522", + "step": 6904, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006704013212583959, + "timestamp": "2025-09-10 02:54:52.267166", + "step": 6905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:54:52.319700", + "step": 6905, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028490772820077837, + "timestamp": "2025-09-10 02:54:52.321853", + "step": 6906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:54:52.374454", + "step": 6906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011347966938046739, + "timestamp": "2025-09-10 02:54:52.376518", + "step": 6907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:54:52.431023", + "step": 6907, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026673488900996745, + "timestamp": "2025-09-10 02:54:52.441604", + "step": 6908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:54:52.494169", + "step": 6908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011277222074568272, + "timestamp": "2025-09-10 02:54:52.496318", + "step": 6909, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:55:09.333076", + "step": 6909, + "epoch": 3 + }, + { + "type": "pplx", + "content": 24073172.905026663, + "timestamp": "2025-09-10 02:55:09.335891", + "step": 6909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:09.390325", + "step": 6909, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004598031286150217, + "timestamp": "2025-09-10 02:55:09.396153", + "step": 6910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:09.450747", + "step": 6910, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.856903226231225e-05, + "timestamp": "2025-09-10 02:55:09.453038", + "step": 6911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:55:09.506875", + "step": 6911, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.248633432434872e-05, + "timestamp": "2025-09-10 02:55:09.513364", + "step": 6912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:55:09.565957", + "step": 6912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011987597827101126, + "timestamp": "2025-09-10 02:55:09.568207", + "step": 6913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:09.621891", + "step": 6913, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017569736810401082, + "timestamp": "2025-09-10 02:55:09.624152", + "step": 6914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:09.677477", + "step": 6914, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.841868551215157e-05, + "timestamp": "2025-09-10 02:55:09.680091", + "step": 6915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:09.733137", + "step": 6915, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.9759321730816737e-05, + "timestamp": "2025-09-10 02:55:09.739007", + "step": 6916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:09.791683", + "step": 6916, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004630325827747583, + "timestamp": "2025-09-10 02:55:09.794073", + "step": 6917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:09.847491", + "step": 6917, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015202411450445652, + "timestamp": "2025-09-10 02:55:09.849559", + "step": 6918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:09.902541", + "step": 6918, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.5344645362347364e-05, + "timestamp": "2025-09-10 02:55:09.904888", + "step": 6919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:55:09.958603", + "step": 6919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002283619687659666, + "timestamp": "2025-09-10 02:55:09.969006", + "step": 6920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:10.021408", + "step": 6920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020906592544633895, + "timestamp": "2025-09-10 02:55:10.023685", + "step": 6921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:10.077503", + "step": 6921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047006976092234254, + "timestamp": "2025-09-10 02:55:10.083502", + "step": 6922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:10.137343", + "step": 6922, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.800849511520937e-05, + "timestamp": "2025-09-10 02:55:10.139832", + "step": 6923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:10.193459", + "step": 6923, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005407427088357508, + "timestamp": "2025-09-10 02:55:10.199521", + "step": 6924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:10.251994", + "step": 6924, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.559710840927437e-05, + "timestamp": "2025-09-10 02:55:10.254974", + "step": 6925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:10.307892", + "step": 6925, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008459268137812614, + "timestamp": "2025-09-10 02:55:10.310318", + "step": 6926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:10.364136", + "step": 6926, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002495987864676863, + "timestamp": "2025-09-10 02:55:10.366150", + "step": 6927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:10.419306", + "step": 6927, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.783969289856032e-05, + "timestamp": "2025-09-10 02:55:10.425002", + "step": 6928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:10.477626", + "step": 6928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005323676159605384, + "timestamp": "2025-09-10 02:55:10.485607", + "step": 6929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:10.539235", + "step": 6929, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002948866691440344, + "timestamp": "2025-09-10 02:55:10.542061", + "step": 6930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:10.595925", + "step": 6930, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009427659213542938, + "timestamp": "2025-09-10 02:55:10.598456", + "step": 6931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:10.652790", + "step": 6931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004973894683644176, + "timestamp": "2025-09-10 02:55:10.658907", + "step": 6932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:10.711998", + "step": 6932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011988679878413677, + "timestamp": "2025-09-10 02:55:10.714246", + "step": 6933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:55:10.767798", + "step": 6933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034859830047935247, + "timestamp": "2025-09-10 02:55:10.777388", + "step": 6934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:10.830896", + "step": 6934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009490315802395344, + "timestamp": "2025-09-10 02:55:10.832960", + "step": 6935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:10.885797", + "step": 6935, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011611237423494458, + "timestamp": "2025-09-10 02:55:10.891645", + "step": 6936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:10.943960", + "step": 6936, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.195281407097355e-05, + "timestamp": "2025-09-10 02:55:10.945923", + "step": 6937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:10.998381", + "step": 6937, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026475408230908215, + "timestamp": "2025-09-10 02:55:11.004942", + "step": 6938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:11.058348", + "step": 6938, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.223136683227494e-05, + "timestamp": "2025-09-10 02:55:11.064673", + "step": 6939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:55:11.118326", + "step": 6939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000286032009171322, + "timestamp": "2025-09-10 02:55:11.128699", + "step": 6940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:11.181675", + "step": 6940, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009600855992175639, + "timestamp": "2025-09-10 02:55:11.184032", + "step": 6941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:11.237713", + "step": 6941, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.1028057744842954e-05, + "timestamp": "2025-09-10 02:55:11.239833", + "step": 6942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:55:11.292249", + "step": 6942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015771265316288918, + "timestamp": "2025-09-10 02:55:11.294532", + "step": 6943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:11.347407", + "step": 6943, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014096121594775468, + "timestamp": "2025-09-10 02:55:11.353340", + "step": 6944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:55:11.425672", + "step": 6944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.033413439989089966, + "timestamp": "2025-09-10 02:55:11.440594", + "step": 6945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:11.494746", + "step": 6945, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015498296124860644, + "timestamp": "2025-09-10 02:55:11.500522", + "step": 6946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:11.553958", + "step": 6946, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028644457925111055, + "timestamp": "2025-09-10 02:55:11.556744", + "step": 6947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:11.610072", + "step": 6947, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.332937558297999e-05, + "timestamp": "2025-09-10 02:55:11.616275", + "step": 6948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:11.668796", + "step": 6948, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024751699529588223, + "timestamp": "2025-09-10 02:55:11.671287", + "step": 6949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:55:11.739622", + "step": 6949, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002412964531686157, + "timestamp": "2025-09-10 02:55:11.752229", + "step": 6950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:11.805109", + "step": 6950, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.8502173993038014e-05, + "timestamp": "2025-09-10 02:55:11.807342", + "step": 6951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:11.860416", + "step": 6951, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013299663260113448, + "timestamp": "2025-09-10 02:55:11.866285", + "step": 6952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:11.918736", + "step": 6952, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008007865981198847, + "timestamp": "2025-09-10 02:55:11.921244", + "step": 6953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:11.975215", + "step": 6953, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.039803050342016e-05, + "timestamp": "2025-09-10 02:55:11.982742", + "step": 6954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:12.037426", + "step": 6954, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004602761473506689, + "timestamp": "2025-09-10 02:55:12.039789", + "step": 6955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:55:12.098261", + "step": 6955, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004310185613576323, + "timestamp": "2025-09-10 02:55:12.109467", + "step": 6956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:55:12.162588", + "step": 6956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005257864482700825, + "timestamp": "2025-09-10 02:55:12.164562", + "step": 6957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:12.217449", + "step": 6957, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000653850962407887, + "timestamp": "2025-09-10 02:55:12.219675", + "step": 6958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:12.272949", + "step": 6958, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.644639067118987e-05, + "timestamp": "2025-09-10 02:55:12.275328", + "step": 6959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 9920060287936.0 + }, + "timestamp": "2025-09-10 02:55:12.350005", + "step": 6959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005518809193745255, + "timestamp": "2025-09-10 02:55:12.364712", + "step": 6960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:12.417817", + "step": 6960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004413666611071676, + "timestamp": "2025-09-10 02:55:12.424069", + "step": 6961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:12.478290", + "step": 6961, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008641178137622774, + "timestamp": "2025-09-10 02:55:12.484593", + "step": 6962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:12.538597", + "step": 6962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025807858910411596, + "timestamp": "2025-09-10 02:55:12.540964", + "step": 6963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:55:12.594820", + "step": 6963, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.628083919058554e-05, + "timestamp": "2025-09-10 02:55:12.601016", + "step": 6964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:12.653912", + "step": 6964, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009217743645422161, + "timestamp": "2025-09-10 02:55:12.656604", + "step": 6965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 8320050574976.0 + }, + "timestamp": "2025-09-10 02:55:12.724455", + "step": 6965, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016400327440351248, + "timestamp": "2025-09-10 02:55:12.737061", + "step": 6966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:12.790206", + "step": 6966, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003185332752764225, + "timestamp": "2025-09-10 02:55:12.798166", + "step": 6967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:12.851623", + "step": 6967, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008124056039378047, + "timestamp": "2025-09-10 02:55:12.857604", + "step": 6968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:12.910816", + "step": 6968, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013546434231102467, + "timestamp": "2025-09-10 02:55:12.913294", + "step": 6969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:12.966852", + "step": 6969, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016551214503124356, + "timestamp": "2025-09-10 02:55:12.969785", + "step": 6970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:55:13.030618", + "step": 6970, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.480200292775407e-05, + "timestamp": "2025-09-10 02:55:13.041533", + "step": 6971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:13.094803", + "step": 6971, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012193608563393354, + "timestamp": "2025-09-10 02:55:13.100881", + "step": 6972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:13.154543", + "step": 6972, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.0489126907486934e-05, + "timestamp": "2025-09-10 02:55:13.156783", + "step": 6973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:13.209917", + "step": 6973, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002034224336966872, + "timestamp": "2025-09-10 02:55:13.212382", + "step": 6974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:13.266121", + "step": 6974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006291036261245608, + "timestamp": "2025-09-10 02:55:13.271787", + "step": 6975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:13.326662", + "step": 6975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034870824310928583, + "timestamp": "2025-09-10 02:55:13.333131", + "step": 6976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:55:13.398463", + "step": 6976, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002291430253535509, + "timestamp": "2025-09-10 02:55:13.411674", + "step": 6977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:13.466524", + "step": 6977, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.7963301514973864e-05, + "timestamp": "2025-09-10 02:55:13.468840", + "step": 6978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:13.522545", + "step": 6978, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013603951083496213, + "timestamp": "2025-09-10 02:55:13.529891", + "step": 6979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:55:13.588390", + "step": 6979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023517789668403566, + "timestamp": "2025-09-10 02:55:13.599601", + "step": 6980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:55:13.653341", + "step": 6980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038756770663894713, + "timestamp": "2025-09-10 02:55:13.663094", + "step": 6981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:13.720480", + "step": 6981, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.3510049939213786e-05, + "timestamp": "2025-09-10 02:55:13.723039", + "step": 6982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:55:13.776891", + "step": 6982, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.902151052490808e-05, + "timestamp": "2025-09-10 02:55:13.786457", + "step": 6983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:13.840148", + "step": 6983, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02695157565176487, + "timestamp": "2025-09-10 02:55:13.846632", + "step": 6984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:55:13.903806", + "step": 6984, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.4220677005359903e-05, + "timestamp": "2025-09-10 02:55:13.915046", + "step": 6985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:55:13.970313", + "step": 6985, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.140782690839842e-05, + "timestamp": "2025-09-10 02:55:13.980137", + "step": 6986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:14.033553", + "step": 6986, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000857262930367142, + "timestamp": "2025-09-10 02:55:14.035730", + "step": 6987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:55:14.088780", + "step": 6987, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.702000190562103e-05, + "timestamp": "2025-09-10 02:55:14.095016", + "step": 6988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:55:14.147885", + "step": 6988, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.4767555512662511e-05, + "timestamp": "2025-09-10 02:55:14.149966", + "step": 6989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:14.204038", + "step": 6989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014243644545786083, + "timestamp": "2025-09-10 02:55:14.206222", + "step": 6990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:14.260411", + "step": 6990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014196685515344143, + "timestamp": "2025-09-10 02:55:14.262779", + "step": 6991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:14.316844", + "step": 6991, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042847523582167923, + "timestamp": "2025-09-10 02:55:14.323064", + "step": 6992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:14.376465", + "step": 6992, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003668736608233303, + "timestamp": "2025-09-10 02:55:14.378674", + "step": 6993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:14.431918", + "step": 6993, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011432624887675047, + "timestamp": "2025-09-10 02:55:14.438385", + "step": 6994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:14.491502", + "step": 6994, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.136528438422829e-05, + "timestamp": "2025-09-10 02:55:14.493868", + "step": 6995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:55:14.555636", + "step": 6995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00809055007994175, + "timestamp": "2025-09-10 02:55:14.567515", + "step": 6996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:55:14.627633", + "step": 6996, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.337383769685403e-05, + "timestamp": "2025-09-10 02:55:14.639405", + "step": 6997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:14.692953", + "step": 6997, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.763316145632416e-06, + "timestamp": "2025-09-10 02:55:14.695022", + "step": 6998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 7360044747200.0 + }, + "timestamp": "2025-09-10 02:55:14.755818", + "step": 6998, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019043213978875428, + "timestamp": "2025-09-10 02:55:14.766731", + "step": 6999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:14.822155", + "step": 6999, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.073474989738315e-05, + "timestamp": "2025-09-10 02:55:14.828520", + "step": 7000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 7000", + "timestamp": "2025-09-10 02:55:15.191436", + "step": 7000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:15.248007", + "step": 7000, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.3104588005226105e-05, + "timestamp": "2025-09-10 02:55:15.250321", + "step": 7001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:15.304074", + "step": 7001, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.853674338140991e-06, + "timestamp": "2025-09-10 02:55:15.306225", + "step": 7002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:15.359508", + "step": 7002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04805157333612442, + "timestamp": "2025-09-10 02:55:15.361687", + "step": 7003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:15.414620", + "step": 7003, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040699567762203515, + "timestamp": "2025-09-10 02:55:15.421047", + "step": 7004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:55:15.474609", + "step": 7004, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.4563810913823545e-05, + "timestamp": "2025-09-10 02:55:15.485141", + "step": 7005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:15.538890", + "step": 7005, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.641797856194898e-05, + "timestamp": "2025-09-10 02:55:15.540985", + "step": 7006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:15.594812", + "step": 7006, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.418989636993501e-06, + "timestamp": "2025-09-10 02:55:15.596952", + "step": 7007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:15.653036", + "step": 7007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00425726268440485, + "timestamp": "2025-09-10 02:55:15.659344", + "step": 7008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:15.712439", + "step": 7008, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0047266217879951, + "timestamp": "2025-09-10 02:55:15.718601", + "step": 7009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:15.772224", + "step": 7009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004258487839251757, + "timestamp": "2025-09-10 02:55:15.774695", + "step": 7010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 2880017550912.0 + }, + "timestamp": "2025-09-10 02:55:15.827970", + "step": 7010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001172769581899047, + "timestamp": "2025-09-10 02:55:15.830465", + "step": 7011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:15.884660", + "step": 7011, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013297060504555702, + "timestamp": "2025-09-10 02:55:15.891424", + "step": 7012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:15.944408", + "step": 7012, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.664418080821633e-05, + "timestamp": "2025-09-10 02:55:15.946816", + "step": 7013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:16.000891", + "step": 7013, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.392694871872663e-05, + "timestamp": "2025-09-10 02:55:16.009053", + "step": 7014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:16.062988", + "step": 7014, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.8508600987843238e-05, + "timestamp": "2025-09-10 02:55:16.065254", + "step": 7015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:16.118610", + "step": 7015, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.0045929229818285e-05, + "timestamp": "2025-09-10 02:55:16.124692", + "step": 7016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:16.177581", + "step": 7016, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.157526564085856e-05, + "timestamp": "2025-09-10 02:55:16.185821", + "step": 7017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:55:16.244377", + "step": 7017, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013229567557573318, + "timestamp": "2025-09-10 02:55:16.254816", + "step": 7018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:16.309304", + "step": 7018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016287763137370348, + "timestamp": "2025-09-10 02:55:16.311971", + "step": 7019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 8000048632384.0 + }, + "timestamp": "2025-09-10 02:55:16.378558", + "step": 7019, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005435793427750468, + "timestamp": "2025-09-10 02:55:16.391546", + "step": 7020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:16.444436", + "step": 7020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043971644481644034, + "timestamp": "2025-09-10 02:55:16.447213", + "step": 7021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 9600058345344.0 + }, + "timestamp": "2025-09-10 02:55:16.521865", + "step": 7021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.038626980036497116, + "timestamp": "2025-09-10 02:55:16.535558", + "step": 7022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:16.589696", + "step": 7022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037362458533607423, + "timestamp": "2025-09-10 02:55:16.591925", + "step": 7023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:16.645143", + "step": 7023, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013430576655082405, + "timestamp": "2025-09-10 02:55:16.651283", + "step": 7024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:16.704171", + "step": 7024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.058919306844472885, + "timestamp": "2025-09-10 02:55:16.706448", + "step": 7025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:16.759685", + "step": 7025, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005710253608413041, + "timestamp": "2025-09-10 02:55:16.761909", + "step": 7026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:16.814670", + "step": 7026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021847318566869944, + "timestamp": "2025-09-10 02:55:16.816853", + "step": 7027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:16.869723", + "step": 7027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014488119632005692, + "timestamp": "2025-09-10 02:55:16.878602", + "step": 7028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:16.931553", + "step": 7028, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017375395691487938, + "timestamp": "2025-09-10 02:55:16.938148", + "step": 7029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:16.991885", + "step": 7029, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.632833148818463e-05, + "timestamp": "2025-09-10 02:55:16.994073", + "step": 7030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 2560015608320.0 + }, + "timestamp": "2025-09-10 02:55:17.048979", + "step": 7030, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.6653014831244946e-05, + "timestamp": "2025-09-10 02:55:17.051328", + "step": 7031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:17.108652", + "step": 7031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006913283723406494, + "timestamp": "2025-09-10 02:55:17.115321", + "step": 7032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 6400038919424.0 + }, + "timestamp": "2025-09-10 02:55:17.170294", + "step": 7032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040089216781780124, + "timestamp": "2025-09-10 02:55:17.179507", + "step": 7033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 3200019493504.0 + }, + "timestamp": "2025-09-10 02:55:17.233645", + "step": 7033, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007994318730197847, + "timestamp": "2025-09-10 02:55:17.235844", + "step": 7034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:17.289023", + "step": 7034, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009965775534510612, + "timestamp": "2025-09-10 02:55:17.291661", + "step": 7035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:55:17.351879", + "step": 7035, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024551197420805693, + "timestamp": "2025-09-10 02:55:17.363423", + "step": 7036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:55:17.422979", + "step": 7036, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020564479927998036, + "timestamp": "2025-09-10 02:55:17.434563", + "step": 7037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:17.487405", + "step": 7037, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021126912906765938, + "timestamp": "2025-09-10 02:55:17.489579", + "step": 7038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:17.544097", + "step": 7038, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007856762385927141, + "timestamp": "2025-09-10 02:55:17.552167", + "step": 7039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:17.605563", + "step": 7039, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001274820970138535, + "timestamp": "2025-09-10 02:55:17.612830", + "step": 7040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 7680046689792.0 + }, + "timestamp": "2025-09-10 02:55:17.672886", + "step": 7040, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003535389667376876, + "timestamp": "2025-09-10 02:55:17.684883", + "step": 7041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:55:17.758057", + "step": 7041, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012710364535450935, + "timestamp": "2025-09-10 02:55:17.771541", + "step": 7042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:17.825197", + "step": 7042, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010614634957164526, + "timestamp": "2025-09-10 02:55:17.827335", + "step": 7043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:17.880128", + "step": 7043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012138697638874874, + "timestamp": "2025-09-10 02:55:17.885939", + "step": 7044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:55:17.942902", + "step": 7044, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.633704394218512e-06, + "timestamp": "2025-09-10 02:55:17.954099", + "step": 7045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 5440033091648.0 + }, + "timestamp": "2025-09-10 02:55:18.008105", + "step": 7045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007055842434056103, + "timestamp": "2025-09-10 02:55:18.014439", + "step": 7046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:18.069170", + "step": 7046, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.52599420491606e-05, + "timestamp": "2025-09-10 02:55:18.071302", + "step": 7047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 6080036976832.0 + }, + "timestamp": "2025-09-10 02:55:18.125948", + "step": 7047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012941339809913188, + "timestamp": "2025-09-10 02:55:18.136344", + "step": 7048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:18.189350", + "step": 7048, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006682535749860108, + "timestamp": "2025-09-10 02:55:18.191467", + "step": 7049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:18.244383", + "step": 7049, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012835065717808902, + "timestamp": "2025-09-10 02:55:18.246650", + "step": 7050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 4160025321280.0 + }, + "timestamp": "2025-09-10 02:55:18.299891", + "step": 7050, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.6844728785799816e-05, + "timestamp": "2025-09-10 02:55:18.301962", + "step": 7051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 9280056402752.0 + }, + "timestamp": "2025-09-10 02:55:18.375534", + "step": 7051, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.747875497443601e-05, + "timestamp": "2025-09-10 02:55:18.389709", + "step": 7052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 4480027263872.0 + }, + "timestamp": "2025-09-10 02:55:18.443197", + "step": 7052, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000303348817396909, + "timestamp": "2025-09-10 02:55:18.445500", + "step": 7053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:18.499880", + "step": 7053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007727089687250555, + "timestamp": "2025-09-10 02:55:18.502413", + "step": 7054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 6720040862016.0 + }, + "timestamp": "2025-09-10 02:55:18.560617", + "step": 7054, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004567324067465961, + "timestamp": "2025-09-10 02:55:18.571032", + "step": 7055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 8960054460160.0 + }, + "timestamp": "2025-09-10 02:55:18.641154", + "step": 7055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030527567490935326, + "timestamp": "2025-09-10 02:55:18.654786", + "step": 7056, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:55:35.608707", + "step": 7056, + "epoch": 3 + }, + { + "type": "pplx", + "content": 18865554.804102868, + "timestamp": "2025-09-10 02:55:35.611908", + "step": 7056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:35.665244", + "step": 7056, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037813952076248825, + "timestamp": "2025-09-10 02:55:35.668899", + "step": 7057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:35.725023", + "step": 7057, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026767307426780462, + "timestamp": "2025-09-10 02:55:35.727564", + "step": 7058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 5120031149056.0 + }, + "timestamp": "2025-09-10 02:55:35.780593", + "step": 7058, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.191412386309821e-05, + "timestamp": "2025-09-10 02:55:35.783382", + "step": 7059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:35.837116", + "step": 7059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000373539311112836, + "timestamp": "2025-09-10 02:55:35.843242", + "step": 7060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:35.896428", + "step": 7060, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003901652991771698, + "timestamp": "2025-09-10 02:55:35.898665", + "step": 7061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:35.952786", + "step": 7061, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.882033630972728e-05, + "timestamp": "2025-09-10 02:55:35.960079", + "step": 7062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 3520021436096.0 + }, + "timestamp": "2025-09-10 02:55:36.013523", + "step": 7062, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002307756105437875, + "timestamp": "2025-09-10 02:55:36.015673", + "step": 7063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 7040042804608.0 + }, + "timestamp": "2025-09-10 02:55:36.075695", + "step": 7063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011797071201726794, + "timestamp": "2025-09-10 02:55:36.087167", + "step": 7064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:36.140394", + "step": 7064, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024234021548181772, + "timestamp": "2025-09-10 02:55:36.142781", + "step": 7065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 13120079713856.0 + }, + "timestamp": "2025-09-10 02:55:36.238890", + "step": 7065, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002509126497898251, + "timestamp": "2025-09-10 02:55:36.257356", + "step": 7066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 5760035034240.0 + }, + "timestamp": "2025-09-10 02:55:36.311123", + "step": 7066, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013593490235507488, + "timestamp": "2025-09-10 02:55:36.319015", + "step": 7067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 4800029206464.0 + }, + "timestamp": "2025-09-10 02:55:36.372160", + "step": 7067, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026906438870355487, + "timestamp": "2025-09-10 02:55:36.378055", + "step": 7068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 3840023378688.0 + }, + "timestamp": "2025-09-10 02:55:36.431156", + "step": 7068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004143257159739733, + "timestamp": "2025-09-10 02:55:36.433349", + "step": 7069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 10560064173120.0 + }, + "timestamp": "2025-09-10 02:55:36.513985", + "step": 7069, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.296109495451674e-05, + "timestamp": "2025-09-10 02:55:36.528928", + "step": 7070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 3, + 224 + ], + "flops": 3360020475552.0 + }, + "timestamp": "2025-09-10 02:55:36.591269", + "step": 7070, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.570770635036752e-05, + "timestamp": "2025-09-10 02:55:36.593516", + "step": 7071, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14691612894976 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9581486694144 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12455932682112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8623338031488 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10859018244352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 13094698457216 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2555063168000 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9900869581696 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8303955143936 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7984572256384 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 7026423593728 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5748892043520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2874446055552 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9262103806592 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8942720919040 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11178401131904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6387657818624 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6068274931072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11497784019456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7345806481280 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23314950858880 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5110126268416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3193828943104 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4790743380864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12775315569664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5429509155968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4471360493312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7665189368832 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3832594718208 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6707040706176 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3513211830656 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4151977605760 + } + ], + "timestamp": "2025-09-10 02:55:53.504071", + "step": 7071, + "epoch": 3 + }, + { + "type": "pplx", + "content": 18515583.117156155, + "timestamp": "2025-09-10 02:55:53.506885", + "step": 7071, + "epoch": 3 + }, + { + "type": "best_pplx", + "content": 18397125.397900093, + "timestamp": "2025-09-10 02:55:53.508374", + "step": 7071, + "epoch": 3 + }, + { + "type": "best_step", + "content": 5292, + "timestamp": "2025-09-10 02:55:53.509782", + "step": 7071, + "epoch": 3 + }, + { + "type": "total_pplx_flops", + "content": 106689854968838400, + "timestamp": "2025-09-10 02:55:53.511030", + "step": 7071, + "epoch": 3 + }, + { + "type": "total_train_flops", + "content": 3.61888601646767e+16, + "timestamp": "2025-09-10 02:55:53.512770", + "step": 7071, + "epoch": 3 + } + ], + "best_evals": { + "pplx": { + "score": 18397125.397900093, + "step": 5292 + }, + "rougel": { + "precision": 0.8192660550458716, + "recall": 0.8192660550458716, + "fmeasure": 0.8192660550458716 + } + } +} \ No newline at end of file