{ "training_args": { "output_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters_2/qa_logi_qa_ff_v1", "overwrite_output_dir": false, "do_train": false, "do_eval": true, "do_predict": false, "eval_strategy": "steps", "prediction_loss_only": false, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 8, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 4, "eval_accumulation_steps": null, "eval_delay": 0, "torch_empty_cache_steps": null, "learning_rate": 2e-05, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3, "max_steps": -1, "lr_scheduler_type": "linear", "lr_scheduler_kwargs": {}, "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": "passive", "log_level_replica": "warning", "log_on_each_node": true, "logging_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters_2/qa_logi_qa_ff_v1/runs/Sep30_22-12-37_gx10", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 20, "logging_nan_inf_filter": true, "save_strategy": "epoch", "save_steps": 500, "save_total_limit": null, "save_safetensors": true, "save_on_each_node": false, "save_only_model": false, "restore_callback_states_from_checkpoint": false, "no_cuda": false, "use_cpu": false, "use_mps_device": false, "seed": 42, "data_seed": null, "jit_mode_eval": false, "use_ipex": false, "bf16": false, "fp16": false, "fp16_opt_level": "O1", "half_precision_backend": "auto", "bf16_full_eval": false, "fp16_full_eval": false, "tf32": null, "local_rank": 0, "ddp_backend": null, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": false, "eval_steps": 115, "dataloader_num_workers": 0, "dataloader_prefetch_factor": null, "past_index": -1, "run_name": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters_2/qa_logi_qa_ff_v1", "disable_tqdm": false, "remove_unused_columns": true, "label_names": null, "load_best_model_at_end": false, "metric_for_best_model": null, "greater_is_better": null, "ignore_data_skip": false, "fsdp": [], "fsdp_min_num_params": 0, "fsdp_config": { "min_num_params": 0, "xla": false, "xla_fsdp_v2": false, "xla_fsdp_grad_ckpt": false }, "fsdp_transformer_layer_cls_to_wrap": null, "accelerator_config": { "split_batches": false, "dispatch_batches": null, "even_batches": true, "use_seedable_sampler": true, "non_blocking": false, "gradient_accumulation_kwargs": null }, "deepspeed": null, "label_smoothing_factor": 0.0, "optim": "adamw_torch", "optim_args": null, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": [], "ddp_find_unused_parameters": null, "ddp_bucket_cap_mb": null, "ddp_broadcast_buffers": null, "dataloader_pin_memory": true, "dataloader_persistent_workers": false, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "hub_model_id": null, "hub_strategy": "every_save", "hub_token": "", "hub_private_repo": null, "hub_always_push": false, "gradient_checkpointing": false, "gradient_checkpointing_kwargs": null, "include_inputs_for_metrics": false, "include_for_metrics": [], "eval_do_concat_batches": true, "fp16_backend": "auto", "push_to_hub_model_id": null, "push_to_hub_organization": null, "push_to_hub_token": "", "mp_parameters": "", "auto_find_batch_size": false, "full_determinism": false, "torchdynamo": null, "ray_scope": "last", "ddp_timeout": 1800, "torch_compile": false, "torch_compile_backend": null, "torch_compile_mode": null, "include_tokens_per_second": false, "include_num_input_tokens_seen": false, "neftune_noise_alpha": null, "optim_target_modules": null, "batch_eval_metrics": false, "eval_on_start": false, "use_liger_kernel": false, "eval_use_gather_object": false, "average_tokens_across_devices": false }, "lora_config": null, "flops": { "eval": 24378677094380800, "train": 49118667663965760, "total": 73497344758346560 }, "total": { "total": 196171.32709000004, "train": 145266.47541, "eval": 50904.85168000001 }, "logs": [ { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:12:50.849475", "step": 0, "epoch": 0 }, { "type": "pplx", "content": 9.2736354286407, "timestamp": "2025-09-30 22:12:50.855801", "step": 0, "epoch": 0 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:12:50.926047", "step": 0, "epoch": 1 }, { "type": "loss", "content": 0.29884400963783264, "timestamp": "2025-09-30 22:12:50.929185", "step": 1, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:50.996988", "step": 1, "epoch": 1 }, { "type": "loss", "content": 0.14724677801132202, "timestamp": "2025-09-30 22:12:51.005634", "step": 2, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:12:51.040009", "step": 2, "epoch": 1 }, { "type": "loss", "content": 0.20179437100887299, "timestamp": "2025-09-30 22:12:51.047554", "step": 3, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:51.100118", "step": 3, "epoch": 1 }, { "type": "loss", "content": 0.21899153292179108, "timestamp": "2025-09-30 22:12:51.149852", "step": 4, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:51.200265", "step": 4, "epoch": 1 }, { "type": "loss", "content": 0.07468795776367188, "timestamp": "2025-09-30 22:12:51.204880", "step": 5, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:12:51.237744", "step": 5, "epoch": 1 }, { "type": "loss", "content": 0.0699843019247055, "timestamp": "2025-09-30 22:12:51.245151", "step": 6, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:51.286084", "step": 6, "epoch": 1 }, { "type": "loss", "content": 0.05495142564177513, "timestamp": "2025-09-30 22:12:51.299347", "step": 7, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:51.345859", "step": 7, "epoch": 1 }, { "type": "loss", "content": 0.05214886739850044, "timestamp": "2025-09-30 22:12:51.375264", "step": 8, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:12:51.430739", "step": 8, "epoch": 1 }, { "type": "loss", "content": 0.02578839845955372, "timestamp": "2025-09-30 22:12:51.443160", "step": 9, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:51.477039", "step": 9, "epoch": 1 }, { "type": "loss", "content": 0.020216139033436775, "timestamp": "2025-09-30 22:12:51.488195", "step": 10, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:51.524025", "step": 10, "epoch": 1 }, { "type": "loss", "content": 0.0157146155834198, "timestamp": "2025-09-30 22:12:51.537275", "step": 11, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:51.574585", "step": 11, "epoch": 1 }, { "type": "loss", "content": 0.029712101444602013, "timestamp": "2025-09-30 22:12:51.608772", "step": 12, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:12:51.642438", "step": 12, "epoch": 1 }, { "type": "loss", "content": 0.022363366559147835, "timestamp": "2025-09-30 22:12:51.647653", "step": 13, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:51.697402", "step": 13, "epoch": 1 }, { "type": "loss", "content": 0.026609627529978752, "timestamp": "2025-09-30 22:12:51.701911", "step": 14, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:51.740576", "step": 14, "epoch": 1 }, { "type": "loss", "content": 0.018716558814048767, "timestamp": "2025-09-30 22:12:51.751439", "step": 15, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:51.788868", "step": 15, "epoch": 1 }, { "type": "loss", "content": 0.014440740458667278, "timestamp": "2025-09-30 22:12:51.823034", "step": 16, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:51.873571", "step": 16, "epoch": 1 }, { "type": "loss", "content": 0.042656391859054565, "timestamp": "2025-09-30 22:12:51.878131", "step": 17, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:51.912452", "step": 17, "epoch": 1 }, { "type": "loss", "content": 0.02184998244047165, "timestamp": "2025-09-30 22:12:51.924864", "step": 18, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:51.975147", "step": 18, "epoch": 1 }, { "type": "loss", "content": 0.03424403443932533, "timestamp": "2025-09-30 22:12:51.980685", "step": 19, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:52.015920", "step": 19, "epoch": 1 }, { "type": "loss", "content": 0.03647345304489136, "timestamp": "2025-09-30 22:12:52.044551", "step": 20, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:52.085277", "step": 20, "epoch": 1 }, { "type": "loss", "content": 0.027352726086974144, "timestamp": "2025-09-30 22:12:52.090570", "step": 21, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:52.126546", "step": 21, "epoch": 1 }, { "type": "loss", "content": 0.035634566098451614, "timestamp": "2025-09-30 22:12:52.134526", "step": 22, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:12:52.179890", "step": 22, "epoch": 1 }, { "type": "loss", "content": 0.034287016838788986, "timestamp": "2025-09-30 22:12:52.191422", "step": 23, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:52.225650", "step": 23, "epoch": 1 }, { "type": "loss", "content": 0.036502204835414886, "timestamp": "2025-09-30 22:12:52.256684", "step": 24, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:12:52.304118", "step": 24, "epoch": 1 }, { "type": "loss", "content": 0.014664776623249054, "timestamp": "2025-09-30 22:12:52.320999", "step": 25, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:52.359135", "step": 25, "epoch": 1 }, { "type": "loss", "content": 0.03228773921728134, "timestamp": "2025-09-30 22:12:52.366322", "step": 26, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:52.408202", "step": 26, "epoch": 1 }, { "type": "loss", "content": 0.015143612399697304, "timestamp": "2025-09-30 22:12:52.421938", "step": 27, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:52.457339", "step": 27, "epoch": 1 }, { "type": "loss", "content": 0.025396913290023804, "timestamp": "2025-09-30 22:12:52.490548", "step": 28, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:52.525372", "step": 28, "epoch": 1 }, { "type": "loss", "content": 0.016927070915699005, "timestamp": "2025-09-30 22:12:52.536003", "step": 29, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:52.580823", "step": 29, "epoch": 1 }, { "type": "loss", "content": 0.01682448200881481, "timestamp": "2025-09-30 22:12:52.594176", "step": 30, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:52.626284", "step": 30, "epoch": 1 }, { "type": "loss", "content": 0.041514765471220016, "timestamp": "2025-09-30 22:12:52.637451", "step": 31, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:52.673962", "step": 31, "epoch": 1 }, { "type": "loss", "content": 0.009538545273244381, "timestamp": "2025-09-30 22:12:52.702975", "step": 32, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:52.746096", "step": 32, "epoch": 1 }, { "type": "loss", "content": 0.0234337467700243, "timestamp": "2025-09-30 22:12:52.758998", "step": 33, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:52.793611", "step": 33, "epoch": 1 }, { "type": "loss", "content": 0.031635984778404236, "timestamp": "2025-09-30 22:12:52.804746", "step": 34, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:52.842929", "step": 34, "epoch": 1 }, { "type": "loss", "content": 0.022598372772336006, "timestamp": "2025-09-30 22:12:52.856537", "step": 35, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:52.895367", "step": 35, "epoch": 1 }, { "type": "loss", "content": 0.019552720710635185, "timestamp": "2025-09-30 22:12:52.929911", "step": 36, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:52.965015", "step": 36, "epoch": 1 }, { "type": "loss", "content": 0.025182483717799187, "timestamp": "2025-09-30 22:12:52.970612", "step": 37, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:53.003075", "step": 37, "epoch": 1 }, { "type": "loss", "content": 0.017326978966593742, "timestamp": "2025-09-30 22:12:53.014247", "step": 38, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:53.046891", "step": 38, "epoch": 1 }, { "type": "loss", "content": 0.022275006398558617, "timestamp": "2025-09-30 22:12:53.053931", "step": 39, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:12:53.085671", "step": 39, "epoch": 1 }, { "type": "loss", "content": 0.032375190407037735, "timestamp": "2025-09-30 22:12:53.113469", "step": 40, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:53.146897", "step": 40, "epoch": 1 }, { "type": "loss", "content": 0.02389678917825222, "timestamp": "2025-09-30 22:12:53.152448", "step": 41, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:53.184436", "step": 41, "epoch": 1 }, { "type": "loss", "content": 0.025564173236489296, "timestamp": "2025-09-30 22:12:53.194630", "step": 42, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:12:53.242138", "step": 42, "epoch": 1 }, { "type": "loss", "content": 0.010295093059539795, "timestamp": "2025-09-30 22:12:53.259901", "step": 43, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:53.299768", "step": 43, "epoch": 1 }, { "type": "loss", "content": 0.011783291585743427, "timestamp": "2025-09-30 22:12:53.334630", "step": 44, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:12:53.385571", "step": 44, "epoch": 1 }, { "type": "loss", "content": 0.012898216024041176, "timestamp": "2025-09-30 22:12:53.396166", "step": 45, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:53.433002", "step": 45, "epoch": 1 }, { "type": "loss", "content": 0.023097814992070198, "timestamp": "2025-09-30 22:12:53.445410", "step": 46, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:53.479343", "step": 46, "epoch": 1 }, { "type": "loss", "content": 0.014813568443059921, "timestamp": "2025-09-30 22:12:53.487286", "step": 47, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:53.528526", "step": 47, "epoch": 1 }, { "type": "loss", "content": 0.018811291083693504, "timestamp": "2025-09-30 22:12:53.560273", "step": 48, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:53.595124", "step": 48, "epoch": 1 }, { "type": "loss", "content": 0.02083074115216732, "timestamp": "2025-09-30 22:12:53.600481", "step": 49, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:53.636287", "step": 49, "epoch": 1 }, { "type": "loss", "content": 0.02495446801185608, "timestamp": "2025-09-30 22:12:53.647467", "step": 50, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:12:53.681179", "step": 50, "epoch": 1 }, { "type": "loss", "content": 0.04506039246916771, "timestamp": "2025-09-30 22:12:53.688585", "step": 51, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:53.726307", "step": 51, "epoch": 1 }, { "type": "loss", "content": 0.018953822553157806, "timestamp": "2025-09-30 22:12:53.760527", "step": 52, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:12:53.795049", "step": 52, "epoch": 1 }, { "type": "loss", "content": 0.011422491632401943, "timestamp": "2025-09-30 22:12:53.808178", "step": 53, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:53.842054", "step": 53, "epoch": 1 }, { "type": "loss", "content": 0.01761559024453163, "timestamp": "2025-09-30 22:12:53.853011", "step": 54, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:53.898729", "step": 54, "epoch": 1 }, { "type": "loss", "content": 0.026785332709550858, "timestamp": "2025-09-30 22:12:53.909680", "step": 55, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:53.944445", "step": 55, "epoch": 1 }, { "type": "loss", "content": 0.04988979548215866, "timestamp": "2025-09-30 22:12:53.977479", "step": 56, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:54.011718", "step": 56, "epoch": 1 }, { "type": "loss", "content": 0.01828792318701744, "timestamp": "2025-09-30 22:12:54.022110", "step": 57, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:54.053405", "step": 57, "epoch": 1 }, { "type": "loss", "content": 0.015893779695034027, "timestamp": "2025-09-30 22:12:54.064436", "step": 58, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:54.105945", "step": 58, "epoch": 1 }, { "type": "loss", "content": 0.03437896445393562, "timestamp": "2025-09-30 22:12:54.116986", "step": 59, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:54.152445", "step": 59, "epoch": 1 }, { "type": "loss", "content": 0.02998773194849491, "timestamp": "2025-09-30 22:12:54.180234", "step": 60, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:54.226681", "step": 60, "epoch": 1 }, { "type": "loss", "content": 0.01960010826587677, "timestamp": "2025-09-30 22:12:54.232880", "step": 61, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:54.267808", "step": 61, "epoch": 1 }, { "type": "loss", "content": 0.018762405961751938, "timestamp": "2025-09-30 22:12:54.275082", "step": 62, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:54.311713", "step": 62, "epoch": 1 }, { "type": "loss", "content": 0.011765447445213795, "timestamp": "2025-09-30 22:12:54.323976", "step": 63, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:12:54.362167", "step": 63, "epoch": 1 }, { "type": "loss", "content": 0.010820180177688599, "timestamp": "2025-09-30 22:12:54.396948", "step": 64, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:12:54.439330", "step": 64, "epoch": 1 }, { "type": "loss", "content": 0.012286880984902382, "timestamp": "2025-09-30 22:12:54.451483", "step": 65, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:12:54.484117", "step": 65, "epoch": 1 }, { "type": "loss", "content": 0.022625945508480072, "timestamp": "2025-09-30 22:12:54.491206", "step": 66, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:54.531290", "step": 66, "epoch": 1 }, { "type": "loss", "content": 0.022299209609627724, "timestamp": "2025-09-30 22:12:54.543461", "step": 67, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:54.580701", "step": 67, "epoch": 1 }, { "type": "loss", "content": 0.011945872567594051, "timestamp": "2025-09-30 22:12:54.613935", "step": 68, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:54.648454", "step": 68, "epoch": 1 }, { "type": "loss", "content": 0.011568807996809483, "timestamp": "2025-09-30 22:12:54.657077", "step": 69, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:12:54.694076", "step": 69, "epoch": 1 }, { "type": "loss", "content": 0.012823755852878094, "timestamp": "2025-09-30 22:12:54.707849", "step": 70, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:54.743116", "step": 70, "epoch": 1 }, { "type": "loss", "content": 0.016860121861100197, "timestamp": "2025-09-30 22:12:54.751007", "step": 71, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:54.784561", "step": 71, "epoch": 1 }, { "type": "loss", "content": 0.018639255315065384, "timestamp": "2025-09-30 22:12:54.817826", "step": 72, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:54.851639", "step": 72, "epoch": 1 }, { "type": "loss", "content": 0.014421514235436916, "timestamp": "2025-09-30 22:12:54.859619", "step": 73, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:54.893271", "step": 73, "epoch": 1 }, { "type": "loss", "content": 0.012593203224241734, "timestamp": "2025-09-30 22:12:54.905702", "step": 74, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:54.939109", "step": 74, "epoch": 1 }, { "type": "loss", "content": 0.013217393308877945, "timestamp": "2025-09-30 22:12:54.950062", "step": 75, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:12:54.983172", "step": 75, "epoch": 1 }, { "type": "loss", "content": 0.019085409119725227, "timestamp": "2025-09-30 22:12:55.011569", "step": 76, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:55.048985", "step": 76, "epoch": 1 }, { "type": "loss", "content": 0.01689778082072735, "timestamp": "2025-09-30 22:12:55.058655", "step": 77, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:55.093483", "step": 77, "epoch": 1 }, { "type": "loss", "content": 0.018813522532582283, "timestamp": "2025-09-30 22:12:55.105858", "step": 78, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:55.146469", "step": 78, "epoch": 1 }, { "type": "loss", "content": 0.013966727070510387, "timestamp": "2025-09-30 22:12:55.157176", "step": 79, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:12:55.193670", "step": 79, "epoch": 1 }, { "type": "loss", "content": 0.019746946170926094, "timestamp": "2025-09-30 22:12:55.221316", "step": 80, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:55.256918", "step": 80, "epoch": 1 }, { "type": "loss", "content": 0.023544225841760635, "timestamp": "2025-09-30 22:12:55.264560", "step": 81, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:55.297812", "step": 81, "epoch": 1 }, { "type": "loss", "content": 0.016823606565594673, "timestamp": "2025-09-30 22:12:55.305425", "step": 82, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:55.348005", "step": 82, "epoch": 1 }, { "type": "loss", "content": 0.01522775087505579, "timestamp": "2025-09-30 22:12:55.361382", "step": 83, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:12:55.411320", "step": 83, "epoch": 1 }, { "type": "loss", "content": 0.008765741251409054, "timestamp": "2025-09-30 22:12:55.445452", "step": 84, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:55.488284", "step": 84, "epoch": 1 }, { "type": "loss", "content": 0.013270394876599312, "timestamp": "2025-09-30 22:12:55.500902", "step": 85, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:12:55.544283", "step": 85, "epoch": 1 }, { "type": "loss", "content": 0.017209267243742943, "timestamp": "2025-09-30 22:12:55.558140", "step": 86, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:55.601486", "step": 86, "epoch": 1 }, { "type": "loss", "content": 0.01655328832566738, "timestamp": "2025-09-30 22:12:55.615091", "step": 87, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:55.677369", "step": 87, "epoch": 1 }, { "type": "loss", "content": 0.014354526065289974, "timestamp": "2025-09-30 22:12:55.711652", "step": 88, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:12:55.789119", "step": 88, "epoch": 1 }, { "type": "loss", "content": 0.010505059733986855, "timestamp": "2025-09-30 22:12:55.802304", "step": 89, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:12:55.866843", "step": 89, "epoch": 1 }, { "type": "loss", "content": 0.03894902393221855, "timestamp": "2025-09-30 22:12:55.873689", "step": 90, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:55.934872", "step": 90, "epoch": 1 }, { "type": "loss", "content": 0.0164689589291811, "timestamp": "2025-09-30 22:12:55.944977", "step": 91, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:56.001555", "step": 91, "epoch": 1 }, { "type": "loss", "content": 0.014508247375488281, "timestamp": "2025-09-30 22:12:56.030211", "step": 92, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:56.077037", "step": 92, "epoch": 1 }, { "type": "loss", "content": 0.018277524039149284, "timestamp": "2025-09-30 22:12:56.084836", "step": 93, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:12:56.132303", "step": 93, "epoch": 1 }, { "type": "loss", "content": 0.018098050728440285, "timestamp": "2025-09-30 22:12:56.143357", "step": 94, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:56.197899", "step": 94, "epoch": 1 }, { "type": "loss", "content": 0.022050291299819946, "timestamp": "2025-09-30 22:12:56.211512", "step": 95, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:56.264204", "step": 95, "epoch": 1 }, { "type": "loss", "content": 0.017889603972434998, "timestamp": "2025-09-30 22:12:56.297166", "step": 96, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:56.352176", "step": 96, "epoch": 1 }, { "type": "loss", "content": 0.019431166350841522, "timestamp": "2025-09-30 22:12:56.359950", "step": 97, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:56.416477", "step": 97, "epoch": 1 }, { "type": "loss", "content": 0.031271129846572876, "timestamp": "2025-09-30 22:12:56.429068", "step": 98, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:56.479355", "step": 98, "epoch": 1 }, { "type": "loss", "content": 0.019475264474749565, "timestamp": "2025-09-30 22:12:56.493070", "step": 99, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:12:56.556543", "step": 99, "epoch": 1 }, { "type": "loss", "content": 0.013815326616168022, "timestamp": "2025-09-30 22:12:56.590701", "step": 100, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:12:56.644790", "step": 100, "epoch": 1 }, { "type": "loss", "content": 0.010819070972502232, "timestamp": "2025-09-30 22:12:56.657871", "step": 101, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:56.716229", "step": 101, "epoch": 1 }, { "type": "loss", "content": 0.021440595388412476, "timestamp": "2025-09-30 22:12:56.728604", "step": 102, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:56.785093", "step": 102, "epoch": 1 }, { "type": "loss", "content": 0.012968943454325199, "timestamp": "2025-09-30 22:12:56.797463", "step": 103, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:12:56.849593", "step": 103, "epoch": 1 }, { "type": "loss", "content": 0.02394869178533554, "timestamp": "2025-09-30 22:12:56.879534", "step": 104, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:56.941585", "step": 104, "epoch": 1 }, { "type": "loss", "content": 0.023614834994077682, "timestamp": "2025-09-30 22:12:56.949565", "step": 105, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:12:57.012662", "step": 105, "epoch": 1 }, { "type": "loss", "content": 0.018549881875514984, "timestamp": "2025-09-30 22:12:57.026546", "step": 106, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:57.077639", "step": 106, "epoch": 1 }, { "type": "loss", "content": 0.014040866866707802, "timestamp": "2025-09-30 22:12:57.090117", "step": 107, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:12:57.134079", "step": 107, "epoch": 1 }, { "type": "loss", "content": 0.01790779083967209, "timestamp": "2025-09-30 22:12:57.168520", "step": 108, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:12:57.206739", "step": 108, "epoch": 1 }, { "type": "loss", "content": 0.013338050805032253, "timestamp": "2025-09-30 22:12:57.211953", "step": 109, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:12:57.251146", "step": 109, "epoch": 1 }, { "type": "loss", "content": 0.02902776561677456, "timestamp": "2025-09-30 22:12:57.263651", "step": 110, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:12:57.307299", "step": 110, "epoch": 1 }, { "type": "loss", "content": 0.013689517974853516, "timestamp": "2025-09-30 22:12:57.323403", "step": 111, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:12:57.359819", "step": 111, "epoch": 1 }, { "type": "loss", "content": 0.01824032887816429, "timestamp": "2025-09-30 22:12:57.392955", "step": 112, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:12:57.443807", "step": 112, "epoch": 1 }, { "type": "loss", "content": 0.01077995914965868, "timestamp": "2025-09-30 22:12:57.457091", "step": 113, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:12:57.490932", "step": 113, "epoch": 1 }, { "type": "loss", "content": 0.01976594887673855, "timestamp": "2025-09-30 22:12:57.499281", "step": 114, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:12:57.536443", "step": 114, "epoch": 1 }, { "type": "loss", "content": 0.03269365429878235, "timestamp": "2025-09-30 22:12:57.545603", "step": 115, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:13:00.036032", "step": 115, "epoch": 1 }, { "type": "pplx", "content": 5.660676883245411, "timestamp": "2025-09-30 22:13:00.039028", "step": 115, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:00.077992", "step": 115, "epoch": 1 }, { "type": "loss", "content": 0.015604332089424133, "timestamp": "2025-09-30 22:13:00.114670", "step": 116, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:00.148349", "step": 116, "epoch": 1 }, { "type": "loss", "content": 0.01104702427983284, "timestamp": "2025-09-30 22:13:00.155849", "step": 117, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:00.188289", "step": 117, "epoch": 1 }, { "type": "loss", "content": 0.022734448313713074, "timestamp": "2025-09-30 22:13:00.200449", "step": 118, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:00.237036", "step": 118, "epoch": 1 }, { "type": "loss", "content": 0.02765716426074505, "timestamp": "2025-09-30 22:13:00.247954", "step": 119, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:00.287836", "step": 119, "epoch": 1 }, { "type": "loss", "content": 0.0134533466771245, "timestamp": "2025-09-30 22:13:00.322507", "step": 120, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:00.363168", "step": 120, "epoch": 1 }, { "type": "loss", "content": 0.03625181317329407, "timestamp": "2025-09-30 22:13:00.376245", "step": 121, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:00.411393", "step": 121, "epoch": 1 }, { "type": "loss", "content": 0.012412041425704956, "timestamp": "2025-09-30 22:13:00.423924", "step": 122, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:00.461573", "step": 122, "epoch": 1 }, { "type": "loss", "content": 0.021080005913972855, "timestamp": "2025-09-30 22:13:00.475408", "step": 123, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:00.517942", "step": 123, "epoch": 1 }, { "type": "loss", "content": 0.011560908518731594, "timestamp": "2025-09-30 22:13:00.554678", "step": 124, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:00.597417", "step": 124, "epoch": 1 }, { "type": "loss", "content": 0.007222126703709364, "timestamp": "2025-09-30 22:13:00.612857", "step": 125, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:00.667781", "step": 125, "epoch": 1 }, { "type": "loss", "content": 0.008931437507271767, "timestamp": "2025-09-30 22:13:00.680456", "step": 126, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:00.716445", "step": 126, "epoch": 1 }, { "type": "loss", "content": 0.014612638391554356, "timestamp": "2025-09-30 22:13:00.727428", "step": 127, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 560 ], "flops": 16611393146432 }, "timestamp": "2025-09-30 22:13:00.789762", "step": 127, "epoch": 1 }, { "type": "loss", "content": 0.01625419408082962, "timestamp": "2025-09-30 22:13:00.829962", "step": 128, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:13:00.871015", "step": 128, "epoch": 1 }, { "type": "loss", "content": 0.009241427294909954, "timestamp": "2025-09-30 22:13:00.887706", "step": 129, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:00.925307", "step": 129, "epoch": 1 }, { "type": "loss", "content": 0.013992900028824806, "timestamp": "2025-09-30 22:13:00.939249", "step": 130, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:00.979348", "step": 130, "epoch": 1 }, { "type": "loss", "content": 0.014244504272937775, "timestamp": "2025-09-30 22:13:00.991479", "step": 131, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:01.026529", "step": 131, "epoch": 1 }, { "type": "loss", "content": 0.018886029720306396, "timestamp": "2025-09-30 22:13:01.060707", "step": 132, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:13:01.107858", "step": 132, "epoch": 1 }, { "type": "loss", "content": 0.01401935238391161, "timestamp": "2025-09-30 22:13:01.124565", "step": 133, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:13:01.174465", "step": 133, "epoch": 1 }, { "type": "loss", "content": 0.00951952300965786, "timestamp": "2025-09-30 22:13:01.191956", "step": 134, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:01.235264", "step": 134, "epoch": 1 }, { "type": "loss", "content": 0.020773863419890404, "timestamp": "2025-09-30 22:13:01.248979", "step": 135, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:01.295943", "step": 135, "epoch": 1 }, { "type": "loss", "content": 0.023069093003869057, "timestamp": "2025-09-30 22:13:01.324159", "step": 136, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:01.359793", "step": 136, "epoch": 1 }, { "type": "loss", "content": 0.0129553796723485, "timestamp": "2025-09-30 22:13:01.367781", "step": 137, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:01.408205", "step": 137, "epoch": 1 }, { "type": "loss", "content": 0.03496166691184044, "timestamp": "2025-09-30 22:13:01.420778", "step": 138, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:01.454505", "step": 138, "epoch": 1 }, { "type": "loss", "content": 0.018751680850982666, "timestamp": "2025-09-30 22:13:01.465528", "step": 139, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:01.510319", "step": 139, "epoch": 1 }, { "type": "loss", "content": 0.022187508642673492, "timestamp": "2025-09-30 22:13:01.539067", "step": 140, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:01.575918", "step": 140, "epoch": 1 }, { "type": "loss", "content": 0.015233765356242657, "timestamp": "2025-09-30 22:13:01.581613", "step": 141, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:01.621741", "step": 141, "epoch": 1 }, { "type": "loss", "content": 0.021425914019346237, "timestamp": "2025-09-30 22:13:01.635086", "step": 142, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:13:01.679480", "step": 142, "epoch": 1 }, { "type": "loss", "content": 0.016796356067061424, "timestamp": "2025-09-30 22:13:01.695600", "step": 143, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:01.740055", "step": 143, "epoch": 1 }, { "type": "loss", "content": 0.017127713188529015, "timestamp": "2025-09-30 22:13:01.768395", "step": 144, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:01.801480", "step": 144, "epoch": 1 }, { "type": "loss", "content": 0.016997547820210457, "timestamp": "2025-09-30 22:13:01.809420", "step": 145, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:01.847471", "step": 145, "epoch": 1 }, { "type": "loss", "content": 0.017635153606534004, "timestamp": "2025-09-30 22:13:01.855288", "step": 146, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:01.890937", "step": 146, "epoch": 1 }, { "type": "loss", "content": 0.015158042311668396, "timestamp": "2025-09-30 22:13:01.898766", "step": 147, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:01.933770", "step": 147, "epoch": 1 }, { "type": "loss", "content": 0.044107209891080856, "timestamp": "2025-09-30 22:13:01.964766", "step": 148, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:01.999535", "step": 148, "epoch": 1 }, { "type": "loss", "content": 0.015536610037088394, "timestamp": "2025-09-30 22:13:02.010143", "step": 149, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:02.052349", "step": 149, "epoch": 1 }, { "type": "loss", "content": 0.04728955402970314, "timestamp": "2025-09-30 22:13:02.063475", "step": 150, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:02.101301", "step": 150, "epoch": 1 }, { "type": "loss", "content": 0.026954729110002518, "timestamp": "2025-09-30 22:13:02.112414", "step": 151, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:02.153727", "step": 151, "epoch": 1 }, { "type": "loss", "content": 0.01647934690117836, "timestamp": "2025-09-30 22:13:02.188629", "step": 152, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:02.229291", "step": 152, "epoch": 1 }, { "type": "loss", "content": 0.013347185216844082, "timestamp": "2025-09-30 22:13:02.239777", "step": 153, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:02.299419", "step": 153, "epoch": 1 }, { "type": "loss", "content": 0.009047807194292545, "timestamp": "2025-09-30 22:13:02.313227", "step": 154, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:02.355995", "step": 154, "epoch": 1 }, { "type": "loss", "content": 0.011396903544664383, "timestamp": "2025-09-30 22:13:02.369721", "step": 155, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:02.415920", "step": 155, "epoch": 1 }, { "type": "loss", "content": 0.015042304992675781, "timestamp": "2025-09-30 22:13:02.452462", "step": 156, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:02.498006", "step": 156, "epoch": 1 }, { "type": "loss", "content": 0.022125402465462685, "timestamp": "2025-09-30 22:13:02.507765", "step": 157, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:02.552007", "step": 157, "epoch": 1 }, { "type": "loss", "content": 0.008051242679357529, "timestamp": "2025-09-30 22:13:02.565862", "step": 158, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:02.599230", "step": 158, "epoch": 1 }, { "type": "loss", "content": 0.03221747651696205, "timestamp": "2025-09-30 22:13:02.611517", "step": 159, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:02.662891", "step": 159, "epoch": 1 }, { "type": "loss", "content": 0.025403138250112534, "timestamp": "2025-09-30 22:13:02.697835", "step": 160, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:02.733609", "step": 160, "epoch": 1 }, { "type": "loss", "content": 0.018038174137473106, "timestamp": "2025-09-30 22:13:02.739223", "step": 161, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:02.772084", "step": 161, "epoch": 1 }, { "type": "loss", "content": 0.015591044910252094, "timestamp": "2025-09-30 22:13:02.784563", "step": 162, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:02.817367", "step": 162, "epoch": 1 }, { "type": "loss", "content": 0.020338229835033417, "timestamp": "2025-09-30 22:13:02.828444", "step": 163, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:02.862879", "step": 163, "epoch": 1 }, { "type": "loss", "content": 0.012969471514225006, "timestamp": "2025-09-30 22:13:02.896344", "step": 164, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:02.929993", "step": 164, "epoch": 1 }, { "type": "loss", "content": 0.012057175859808922, "timestamp": "2025-09-30 22:13:02.940689", "step": 165, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:02.975589", "step": 165, "epoch": 1 }, { "type": "loss", "content": 0.01438069622963667, "timestamp": "2025-09-30 22:13:02.985925", "step": 166, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:03.027125", "step": 166, "epoch": 1 }, { "type": "loss", "content": 0.009808840230107307, "timestamp": "2025-09-30 22:13:03.039356", "step": 167, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:03.083029", "step": 167, "epoch": 1 }, { "type": "loss", "content": 0.012758846394717693, "timestamp": "2025-09-30 22:13:03.111707", "step": 168, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:03.146770", "step": 168, "epoch": 1 }, { "type": "loss", "content": 0.017916690558195114, "timestamp": "2025-09-30 22:13:03.152263", "step": 169, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:03.197680", "step": 169, "epoch": 1 }, { "type": "loss", "content": 0.014809397980570793, "timestamp": "2025-09-30 22:13:03.205675", "step": 170, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:03.244913", "step": 170, "epoch": 1 }, { "type": "loss", "content": 0.013050451874732971, "timestamp": "2025-09-30 22:13:03.258603", "step": 171, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:03.294418", "step": 171, "epoch": 1 }, { "type": "loss", "content": 0.013798566535115242, "timestamp": "2025-09-30 22:13:03.323050", "step": 172, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:03.356661", "step": 172, "epoch": 1 }, { "type": "loss", "content": 0.014890993945300579, "timestamp": "2025-09-30 22:13:03.366580", "step": 173, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:03.399001", "step": 173, "epoch": 1 }, { "type": "loss", "content": 0.014647998847067356, "timestamp": "2025-09-30 22:13:03.409578", "step": 174, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:03.447934", "step": 174, "epoch": 1 }, { "type": "loss", "content": 0.017779076471924782, "timestamp": "2025-09-30 22:13:03.457024", "step": 175, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:03.498102", "step": 175, "epoch": 1 }, { "type": "loss", "content": 0.01286756806075573, "timestamp": "2025-09-30 22:13:03.531262", "step": 176, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:03.570414", "step": 176, "epoch": 1 }, { "type": "loss", "content": 0.010901299305260181, "timestamp": "2025-09-30 22:13:03.583547", "step": 177, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:03.622018", "step": 177, "epoch": 1 }, { "type": "loss", "content": 0.009808521717786789, "timestamp": "2025-09-30 22:13:03.629907", "step": 178, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:13:03.675999", "step": 178, "epoch": 1 }, { "type": "loss", "content": 0.007272894959896803, "timestamp": "2025-09-30 22:13:03.693961", "step": 179, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:03.737094", "step": 179, "epoch": 1 }, { "type": "loss", "content": 0.02019336074590683, "timestamp": "2025-09-30 22:13:03.771301", "step": 180, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:03.806630", "step": 180, "epoch": 1 }, { "type": "loss", "content": 0.012186083011329174, "timestamp": "2025-09-30 22:13:03.817107", "step": 181, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:03.855226", "step": 181, "epoch": 1 }, { "type": "loss", "content": 0.012741667218506336, "timestamp": "2025-09-30 22:13:03.866387", "step": 182, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:03.908037", "step": 182, "epoch": 1 }, { "type": "loss", "content": 0.04050694778561592, "timestamp": "2025-09-30 22:13:03.916065", "step": 183, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:03.950799", "step": 183, "epoch": 1 }, { "type": "loss", "content": 0.013532925397157669, "timestamp": "2025-09-30 22:13:03.984233", "step": 184, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:13:04.035033", "step": 184, "epoch": 1 }, { "type": "loss", "content": 0.022425033152103424, "timestamp": "2025-09-30 22:13:04.043562", "step": 185, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:04.082249", "step": 185, "epoch": 1 }, { "type": "loss", "content": 0.011872397735714912, "timestamp": "2025-09-30 22:13:04.092620", "step": 186, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:04.134065", "step": 186, "epoch": 1 }, { "type": "loss", "content": 0.02194029651582241, "timestamp": "2025-09-30 22:13:04.147721", "step": 187, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:04.182162", "step": 187, "epoch": 1 }, { "type": "loss", "content": 0.03069671429693699, "timestamp": "2025-09-30 22:13:04.210421", "step": 188, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:04.245500", "step": 188, "epoch": 1 }, { "type": "loss", "content": 0.010479142889380455, "timestamp": "2025-09-30 22:13:04.258121", "step": 189, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:04.290163", "step": 189, "epoch": 1 }, { "type": "loss", "content": 0.01092996820807457, "timestamp": "2025-09-30 22:13:04.302722", "step": 190, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:04.342405", "step": 190, "epoch": 1 }, { "type": "loss", "content": 0.01756451278924942, "timestamp": "2025-09-30 22:13:04.358207", "step": 191, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:04.392179", "step": 191, "epoch": 1 }, { "type": "loss", "content": 0.009152082726359367, "timestamp": "2025-09-30 22:13:04.425573", "step": 192, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:04.456335", "step": 192, "epoch": 1 }, { "type": "loss", "content": 0.01600995659828186, "timestamp": "2025-09-30 22:13:04.460885", "step": 193, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:04.501867", "step": 193, "epoch": 1 }, { "type": "loss", "content": 0.015879737213253975, "timestamp": "2025-09-30 22:13:04.512284", "step": 194, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:04.555622", "step": 194, "epoch": 1 }, { "type": "loss", "content": 0.011121682822704315, "timestamp": "2025-09-30 22:13:04.571589", "step": 195, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:04.608940", "step": 195, "epoch": 1 }, { "type": "loss", "content": 0.01052766665816307, "timestamp": "2025-09-30 22:13:04.643546", "step": 196, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:04.680449", "step": 196, "epoch": 1 }, { "type": "loss", "content": 0.021621566265821457, "timestamp": "2025-09-30 22:13:04.693157", "step": 197, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:04.726546", "step": 197, "epoch": 1 }, { "type": "loss", "content": 0.016959909349679947, "timestamp": "2025-09-30 22:13:04.737045", "step": 198, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:04.768814", "step": 198, "epoch": 1 }, { "type": "loss", "content": 0.02073858492076397, "timestamp": "2025-09-30 22:13:04.778992", "step": 199, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:04.822896", "step": 199, "epoch": 1 }, { "type": "loss", "content": 0.013792769983410835, "timestamp": "2025-09-30 22:13:04.854174", "step": 200, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:04.893771", "step": 200, "epoch": 1 }, { "type": "loss", "content": 0.016457555815577507, "timestamp": "2025-09-30 22:13:04.899269", "step": 201, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:04.945889", "step": 201, "epoch": 1 }, { "type": "loss", "content": 0.01595127210021019, "timestamp": "2025-09-30 22:13:04.959324", "step": 202, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:04.996749", "step": 202, "epoch": 1 }, { "type": "loss", "content": 0.005044651683419943, "timestamp": "2025-09-30 22:13:05.010621", "step": 203, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:05.053738", "step": 203, "epoch": 1 }, { "type": "loss", "content": 0.010557775385677814, "timestamp": "2025-09-30 22:13:05.088410", "step": 204, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:05.127278", "step": 204, "epoch": 1 }, { "type": "loss", "content": 0.01743490993976593, "timestamp": "2025-09-30 22:13:05.139857", "step": 205, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:05.173179", "step": 205, "epoch": 1 }, { "type": "loss", "content": 0.021075841039419174, "timestamp": "2025-09-30 22:13:05.185473", "step": 206, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:05.219879", "step": 206, "epoch": 1 }, { "type": "loss", "content": 0.02082311548292637, "timestamp": "2025-09-30 22:13:05.233265", "step": 207, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:05.269150", "step": 207, "epoch": 1 }, { "type": "loss", "content": 0.017491625621914864, "timestamp": "2025-09-30 22:13:05.303842", "step": 208, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:05.342779", "step": 208, "epoch": 1 }, { "type": "loss", "content": 0.015582921914756298, "timestamp": "2025-09-30 22:13:05.350836", "step": 209, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:05.392387", "step": 209, "epoch": 1 }, { "type": "loss", "content": 0.011036443524062634, "timestamp": "2025-09-30 22:13:05.406008", "step": 210, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:05.440800", "step": 210, "epoch": 1 }, { "type": "loss", "content": 0.017281629145145416, "timestamp": "2025-09-30 22:13:05.448849", "step": 211, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:05.485090", "step": 211, "epoch": 1 }, { "type": "loss", "content": 0.015614760108292103, "timestamp": "2025-09-30 22:13:05.519272", "step": 212, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:05.558703", "step": 212, "epoch": 1 }, { "type": "loss", "content": 0.01294607762247324, "timestamp": "2025-09-30 22:13:05.568541", "step": 213, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:05.601656", "step": 213, "epoch": 1 }, { "type": "loss", "content": 0.016707144677639008, "timestamp": "2025-09-30 22:13:05.612699", "step": 214, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:05.645005", "step": 214, "epoch": 1 }, { "type": "loss", "content": 0.018367597833275795, "timestamp": "2025-09-30 22:13:05.657282", "step": 215, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:05.699428", "step": 215, "epoch": 1 }, { "type": "loss", "content": 0.013285262510180473, "timestamp": "2025-09-30 22:13:05.734046", "step": 216, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:05.769701", "step": 216, "epoch": 1 }, { "type": "loss", "content": 0.007377085275948048, "timestamp": "2025-09-30 22:13:05.782322", "step": 217, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:05.836491", "step": 217, "epoch": 1 }, { "type": "loss", "content": 0.010679355822503567, "timestamp": "2025-09-30 22:13:05.849033", "step": 218, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:05.897052", "step": 218, "epoch": 1 }, { "type": "loss", "content": 0.0184700358659029, "timestamp": "2025-09-30 22:13:05.910380", "step": 219, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:05.947423", "step": 219, "epoch": 1 }, { "type": "loss", "content": 0.011153987608850002, "timestamp": "2025-09-30 22:13:05.980686", "step": 220, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:06.015471", "step": 220, "epoch": 1 }, { "type": "loss", "content": 0.009359365329146385, "timestamp": "2025-09-30 22:13:06.034416", "step": 221, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:06.076823", "step": 221, "epoch": 1 }, { "type": "loss", "content": 0.01882639341056347, "timestamp": "2025-09-30 22:13:06.084237", "step": 222, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:06.131449", "step": 222, "epoch": 1 }, { "type": "loss", "content": 0.00970226339995861, "timestamp": "2025-09-30 22:13:06.143890", "step": 223, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:06.189536", "step": 223, "epoch": 1 }, { "type": "loss", "content": 0.018550271168351173, "timestamp": "2025-09-30 22:13:06.218377", "step": 224, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:06.254555", "step": 224, "epoch": 1 }, { "type": "loss", "content": 0.013726242817938328, "timestamp": "2025-09-30 22:13:06.273913", "step": 225, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:06.312668", "step": 225, "epoch": 1 }, { "type": "loss", "content": 0.013593848794698715, "timestamp": "2025-09-30 22:13:06.328512", "step": 226, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:06.366949", "step": 226, "epoch": 1 }, { "type": "loss", "content": 0.017183566465973854, "timestamp": "2025-09-30 22:13:06.374517", "step": 227, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:13:06.424362", "step": 227, "epoch": 1 }, { "type": "loss", "content": 0.010725222527980804, "timestamp": "2025-09-30 22:13:06.462340", "step": 228, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:06.494745", "step": 228, "epoch": 1 }, { "type": "loss", "content": 0.012593534775078297, "timestamp": "2025-09-30 22:13:06.507437", "step": 229, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:06.551389", "step": 229, "epoch": 1 }, { "type": "loss", "content": 0.012523045763373375, "timestamp": "2025-09-30 22:13:06.563627", "step": 230, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:13:08.958662", "step": 230, "epoch": 1 }, { "type": "pplx", "content": 5.586642946954409, "timestamp": "2025-09-30 22:13:08.962071", "step": 230, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:08.993642", "step": 230, "epoch": 1 }, { "type": "loss", "content": 0.01927979476749897, "timestamp": "2025-09-30 22:13:09.000193", "step": 231, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:09.032526", "step": 231, "epoch": 1 }, { "type": "loss", "content": 0.026295769959688187, "timestamp": "2025-09-30 22:13:09.065544", "step": 232, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:09.096533", "step": 232, "epoch": 1 }, { "type": "loss", "content": 0.01865268498659134, "timestamp": "2025-09-30 22:13:09.104636", "step": 233, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:09.143951", "step": 233, "epoch": 1 }, { "type": "loss", "content": 0.012808283790946007, "timestamp": "2025-09-30 22:13:09.156510", "step": 234, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:09.195595", "step": 234, "epoch": 1 }, { "type": "loss", "content": 0.01775694452226162, "timestamp": "2025-09-30 22:13:09.203343", "step": 235, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:09.239789", "step": 235, "epoch": 1 }, { "type": "loss", "content": 0.010759370401501656, "timestamp": "2025-09-30 22:13:09.274016", "step": 236, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:09.307573", "step": 236, "epoch": 1 }, { "type": "loss", "content": 0.01735626719892025, "timestamp": "2025-09-30 22:13:09.316361", "step": 237, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:09.352777", "step": 237, "epoch": 1 }, { "type": "loss", "content": 0.01837928406894207, "timestamp": "2025-09-30 22:13:09.365117", "step": 238, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:09.398660", "step": 238, "epoch": 1 }, { "type": "loss", "content": 0.015187880955636501, "timestamp": "2025-09-30 22:13:09.411008", "step": 239, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:09.450674", "step": 239, "epoch": 1 }, { "type": "loss", "content": 0.013186373747885227, "timestamp": "2025-09-30 22:13:09.484894", "step": 240, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:09.524107", "step": 240, "epoch": 1 }, { "type": "loss", "content": 0.015222107991576195, "timestamp": "2025-09-30 22:13:09.537183", "step": 241, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:09.574072", "step": 241, "epoch": 1 }, { "type": "loss", "content": 0.011916671879589558, "timestamp": "2025-09-30 22:13:09.585349", "step": 242, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:09.626203", "step": 242, "epoch": 1 }, { "type": "loss", "content": 0.019251955673098564, "timestamp": "2025-09-30 22:13:09.637260", "step": 243, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:09.675465", "step": 243, "epoch": 1 }, { "type": "loss", "content": 0.007890812121331692, "timestamp": "2025-09-30 22:13:09.710122", "step": 244, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:09.745171", "step": 244, "epoch": 1 }, { "type": "loss", "content": 0.018134785816073418, "timestamp": "2025-09-30 22:13:09.758138", "step": 245, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:09.801735", "step": 245, "epoch": 1 }, { "type": "loss", "content": 0.01059873029589653, "timestamp": "2025-09-30 22:13:09.817338", "step": 246, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:09.865109", "step": 246, "epoch": 1 }, { "type": "loss", "content": 0.009460191242396832, "timestamp": "2025-09-30 22:13:09.878488", "step": 247, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:09.917552", "step": 247, "epoch": 1 }, { "type": "loss", "content": 0.021795684471726418, "timestamp": "2025-09-30 22:13:09.950934", "step": 248, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:09.986449", "step": 248, "epoch": 1 }, { "type": "loss", "content": 0.01156249362975359, "timestamp": "2025-09-30 22:13:09.999765", "step": 249, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:10.049108", "step": 249, "epoch": 1 }, { "type": "loss", "content": 0.011901349760591984, "timestamp": "2025-09-30 22:13:10.065014", "step": 250, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:10.108282", "step": 250, "epoch": 1 }, { "type": "loss", "content": 0.012487679719924927, "timestamp": "2025-09-30 22:13:10.122063", "step": 251, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:10.164909", "step": 251, "epoch": 1 }, { "type": "loss", "content": 0.005352849140763283, "timestamp": "2025-09-30 22:13:10.201746", "step": 252, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:10.243540", "step": 252, "epoch": 1 }, { "type": "loss", "content": 0.01035391166806221, "timestamp": "2025-09-30 22:13:10.253527", "step": 253, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:10.293386", "step": 253, "epoch": 1 }, { "type": "loss", "content": 0.015251671895384789, "timestamp": "2025-09-30 22:13:10.306687", "step": 254, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:10.344026", "step": 254, "epoch": 1 }, { "type": "loss", "content": 0.010876107029616833, "timestamp": "2025-09-30 22:13:10.357750", "step": 255, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:10.390728", "step": 255, "epoch": 1 }, { "type": "loss", "content": 0.022635027766227722, "timestamp": "2025-09-30 22:13:10.422610", "step": 256, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:10.460109", "step": 256, "epoch": 1 }, { "type": "loss", "content": 0.017526494339108467, "timestamp": "2025-09-30 22:13:10.473255", "step": 257, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:10.509052", "step": 257, "epoch": 1 }, { "type": "loss", "content": 0.013761814683675766, "timestamp": "2025-09-30 22:13:10.521293", "step": 258, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:10.567794", "step": 258, "epoch": 1 }, { "type": "loss", "content": 0.017204131931066513, "timestamp": "2025-09-30 22:13:10.581250", "step": 259, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:10.623168", "step": 259, "epoch": 1 }, { "type": "loss", "content": 0.01420542597770691, "timestamp": "2025-09-30 22:13:10.657708", "step": 260, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:13:10.707682", "step": 260, "epoch": 1 }, { "type": "loss", "content": 0.011504081077873707, "timestamp": "2025-09-30 22:13:10.724696", "step": 261, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:10.762850", "step": 261, "epoch": 1 }, { "type": "loss", "content": 0.024398203939199448, "timestamp": "2025-09-30 22:13:10.776246", "step": 262, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:10.809490", "step": 262, "epoch": 1 }, { "type": "loss", "content": 0.028062906116247177, "timestamp": "2025-09-30 22:13:10.817255", "step": 263, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:10.858904", "step": 263, "epoch": 1 }, { "type": "loss", "content": 0.030076058581471443, "timestamp": "2025-09-30 22:13:10.890016", "step": 264, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:10.931202", "step": 264, "epoch": 1 }, { "type": "loss", "content": 0.03159669414162636, "timestamp": "2025-09-30 22:13:10.936910", "step": 265, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:10.973296", "step": 265, "epoch": 1 }, { "type": "loss", "content": 0.015276629477739334, "timestamp": "2025-09-30 22:13:10.985927", "step": 266, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:11.025178", "step": 266, "epoch": 1 }, { "type": "loss", "content": 0.01706337183713913, "timestamp": "2025-09-30 22:13:11.038547", "step": 267, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:11.072954", "step": 267, "epoch": 1 }, { "type": "loss", "content": 0.020861618220806122, "timestamp": "2025-09-30 22:13:11.106343", "step": 268, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:11.142600", "step": 268, "epoch": 1 }, { "type": "loss", "content": 0.023828132078051567, "timestamp": "2025-09-30 22:13:11.153144", "step": 269, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:11.189906", "step": 269, "epoch": 1 }, { "type": "loss", "content": 0.014024309813976288, "timestamp": "2025-09-30 22:13:11.203285", "step": 270, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:13:11.247665", "step": 270, "epoch": 1 }, { "type": "loss", "content": 0.00785061251372099, "timestamp": "2025-09-30 22:13:11.264109", "step": 271, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:11.306283", "step": 271, "epoch": 1 }, { "type": "loss", "content": 0.012670233845710754, "timestamp": "2025-09-30 22:13:11.343004", "step": 272, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:11.377886", "step": 272, "epoch": 1 }, { "type": "loss", "content": 0.012132198549807072, "timestamp": "2025-09-30 22:13:11.391174", "step": 273, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:11.425283", "step": 273, "epoch": 1 }, { "type": "loss", "content": 0.022953467443585396, "timestamp": "2025-09-30 22:13:11.435626", "step": 274, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:13:11.480483", "step": 274, "epoch": 1 }, { "type": "loss", "content": 0.007391286548227072, "timestamp": "2025-09-30 22:13:11.496649", "step": 275, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:11.534934", "step": 275, "epoch": 1 }, { "type": "loss", "content": 0.014905553311109543, "timestamp": "2025-09-30 22:13:11.569477", "step": 276, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:11.603011", "step": 276, "epoch": 1 }, { "type": "loss", "content": 0.01947295479476452, "timestamp": "2025-09-30 22:13:11.612925", "step": 277, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:11.650933", "step": 277, "epoch": 1 }, { "type": "loss", "content": 0.014910265803337097, "timestamp": "2025-09-30 22:13:11.658440", "step": 278, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:11.695534", "step": 278, "epoch": 1 }, { "type": "loss", "content": 0.019228165969252586, "timestamp": "2025-09-30 22:13:11.706056", "step": 279, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:11.741078", "step": 279, "epoch": 1 }, { "type": "loss", "content": 0.023536305874586105, "timestamp": "2025-09-30 22:13:11.772312", "step": 280, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:11.806752", "step": 280, "epoch": 1 }, { "type": "loss", "content": 0.03552548587322235, "timestamp": "2025-09-30 22:13:11.819860", "step": 281, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:11.851986", "step": 281, "epoch": 1 }, { "type": "loss", "content": 0.013345292769372463, "timestamp": "2025-09-30 22:13:11.862298", "step": 282, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:11.903289", "step": 282, "epoch": 1 }, { "type": "loss", "content": 0.017060376703739166, "timestamp": "2025-09-30 22:13:11.917288", "step": 283, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:11.950807", "step": 283, "epoch": 1 }, { "type": "loss", "content": 0.013612824492156506, "timestamp": "2025-09-30 22:13:11.983886", "step": 284, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:12.026228", "step": 284, "epoch": 1 }, { "type": "loss", "content": 0.011212517507374287, "timestamp": "2025-09-30 22:13:12.041328", "step": 285, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:12.084694", "step": 285, "epoch": 1 }, { "type": "loss", "content": 0.019147543236613274, "timestamp": "2025-09-30 22:13:12.100584", "step": 286, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:12.138217", "step": 286, "epoch": 1 }, { "type": "loss", "content": 0.022566957399249077, "timestamp": "2025-09-30 22:13:12.148722", "step": 287, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:12.182427", "step": 287, "epoch": 1 }, { "type": "loss", "content": 0.011118652299046516, "timestamp": "2025-09-30 22:13:12.213659", "step": 288, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:12.246350", "step": 288, "epoch": 1 }, { "type": "loss", "content": 0.026219571009278297, "timestamp": "2025-09-30 22:13:12.255017", "step": 289, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:12.293929", "step": 289, "epoch": 1 }, { "type": "loss", "content": 0.011589091271162033, "timestamp": "2025-09-30 22:13:12.307321", "step": 290, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:12.344614", "step": 290, "epoch": 1 }, { "type": "loss", "content": 0.008985496126115322, "timestamp": "2025-09-30 22:13:12.357941", "step": 291, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:12.392452", "step": 291, "epoch": 1 }, { "type": "loss", "content": 0.014308687299489975, "timestamp": "2025-09-30 22:13:12.425565", "step": 292, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:12.460078", "step": 292, "epoch": 1 }, { "type": "loss", "content": 0.02304118312895298, "timestamp": "2025-09-30 22:13:12.469001", "step": 293, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:12.501653", "step": 293, "epoch": 1 }, { "type": "loss", "content": 0.013603178784251213, "timestamp": "2025-09-30 22:13:12.513907", "step": 294, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:12.547749", "step": 294, "epoch": 1 }, { "type": "loss", "content": 0.022707384079694748, "timestamp": "2025-09-30 22:13:12.554843", "step": 295, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:12.598183", "step": 295, "epoch": 1 }, { "type": "loss", "content": 0.00937237311154604, "timestamp": "2025-09-30 22:13:12.634999", "step": 296, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:12.667321", "step": 296, "epoch": 1 }, { "type": "loss", "content": 0.02236713282763958, "timestamp": "2025-09-30 22:13:12.672148", "step": 297, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:12.713502", "step": 297, "epoch": 1 }, { "type": "loss", "content": 0.017074013128876686, "timestamp": "2025-09-30 22:13:12.720372", "step": 298, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:12.756555", "step": 298, "epoch": 1 }, { "type": "loss", "content": 0.0165159460157156, "timestamp": "2025-09-30 22:13:12.766862", "step": 299, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:12.801445", "step": 299, "epoch": 1 }, { "type": "loss", "content": 0.01714632846415043, "timestamp": "2025-09-30 22:13:12.833438", "step": 300, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:12.874918", "step": 300, "epoch": 1 }, { "type": "loss", "content": 0.012943536043167114, "timestamp": "2025-09-30 22:13:12.888196", "step": 301, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:12.919777", "step": 301, "epoch": 1 }, { "type": "loss", "content": 0.01896374672651291, "timestamp": "2025-09-30 22:13:12.927849", "step": 302, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:12.962782", "step": 302, "epoch": 1 }, { "type": "loss", "content": 0.03681395947933197, "timestamp": "2025-09-30 22:13:12.969874", "step": 303, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:13.002007", "step": 303, "epoch": 1 }, { "type": "loss", "content": 0.010998062789440155, "timestamp": "2025-09-30 22:13:13.034066", "step": 304, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:13.079304", "step": 304, "epoch": 1 }, { "type": "loss", "content": 0.010683653876185417, "timestamp": "2025-09-30 22:13:13.091920", "step": 305, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:13.124037", "step": 305, "epoch": 1 }, { "type": "loss", "content": 0.017140964046120644, "timestamp": "2025-09-30 22:13:13.131979", "step": 306, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:13.175756", "step": 306, "epoch": 1 }, { "type": "loss", "content": 0.011500844731926918, "timestamp": "2025-09-30 22:13:13.183647", "step": 307, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:13.221853", "step": 307, "epoch": 1 }, { "type": "loss", "content": 0.017765764147043228, "timestamp": "2025-09-30 22:13:13.256527", "step": 308, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:13.296566", "step": 308, "epoch": 1 }, { "type": "loss", "content": 0.0074893394485116005, "timestamp": "2025-09-30 22:13:13.311921", "step": 309, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:13:13.354503", "step": 309, "epoch": 1 }, { "type": "loss", "content": 0.011833159253001213, "timestamp": "2025-09-30 22:13:13.371596", "step": 310, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:13.411881", "step": 310, "epoch": 1 }, { "type": "loss", "content": 0.018905291333794594, "timestamp": "2025-09-30 22:13:13.425743", "step": 311, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:13.472333", "step": 311, "epoch": 1 }, { "type": "loss", "content": 0.009745593182742596, "timestamp": "2025-09-30 22:13:13.507202", "step": 312, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:13.545048", "step": 312, "epoch": 1 }, { "type": "loss", "content": 0.025344759225845337, "timestamp": "2025-09-30 22:13:13.552972", "step": 313, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:13.593207", "step": 313, "epoch": 1 }, { "type": "loss", "content": 0.01137094758450985, "timestamp": "2025-09-30 22:13:13.606978", "step": 314, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:13.640901", "step": 314, "epoch": 1 }, { "type": "loss", "content": 0.011146454140543938, "timestamp": "2025-09-30 22:13:13.653310", "step": 315, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:13.690327", "step": 315, "epoch": 1 }, { "type": "loss", "content": 0.0077347299084067345, "timestamp": "2025-09-30 22:13:13.724511", "step": 316, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:13.763431", "step": 316, "epoch": 1 }, { "type": "loss", "content": 0.00844904687255621, "timestamp": "2025-09-30 22:13:13.776837", "step": 317, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:13.818694", "step": 317, "epoch": 1 }, { "type": "loss", "content": 0.010122202336788177, "timestamp": "2025-09-30 22:13:13.832083", "step": 318, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:13.867450", "step": 318, "epoch": 1 }, { "type": "loss", "content": 0.011455384083092213, "timestamp": "2025-09-30 22:13:13.880021", "step": 319, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:13.917215", "step": 319, "epoch": 1 }, { "type": "loss", "content": 0.006118587218225002, "timestamp": "2025-09-30 22:13:13.951870", "step": 320, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:13.987479", "step": 320, "epoch": 1 }, { "type": "loss", "content": 0.022060874849557877, "timestamp": "2025-09-30 22:13:14.003334", "step": 321, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:14.039419", "step": 321, "epoch": 1 }, { "type": "loss", "content": 0.017150264233350754, "timestamp": "2025-09-30 22:13:14.051980", "step": 322, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:14.097350", "step": 322, "epoch": 1 }, { "type": "loss", "content": 0.019098486751317978, "timestamp": "2025-09-30 22:13:14.111403", "step": 323, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:14.144914", "step": 323, "epoch": 1 }, { "type": "loss", "content": 0.024245485663414, "timestamp": "2025-09-30 22:13:14.172890", "step": 324, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:14.215024", "step": 324, "epoch": 1 }, { "type": "loss", "content": 0.009648287668824196, "timestamp": "2025-09-30 22:13:14.219837", "step": 325, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:14.253632", "step": 325, "epoch": 1 }, { "type": "loss", "content": 0.014924516901373863, "timestamp": "2025-09-30 22:13:14.260888", "step": 326, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:14.307634", "step": 326, "epoch": 1 }, { "type": "loss", "content": 0.01756501942873001, "timestamp": "2025-09-30 22:13:14.320225", "step": 327, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:13:14.363150", "step": 327, "epoch": 1 }, { "type": "loss", "content": 0.03167622908949852, "timestamp": "2025-09-30 22:13:14.391898", "step": 328, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:14.427759", "step": 328, "epoch": 1 }, { "type": "loss", "content": 0.005993070546537638, "timestamp": "2025-09-30 22:13:14.440870", "step": 329, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:14.477194", "step": 329, "epoch": 1 }, { "type": "loss", "content": 0.017649002373218536, "timestamp": "2025-09-30 22:13:14.487505", "step": 330, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:14.532018", "step": 330, "epoch": 1 }, { "type": "loss", "content": 0.008263523690402508, "timestamp": "2025-09-30 22:13:14.545874", "step": 331, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:14.596560", "step": 331, "epoch": 1 }, { "type": "loss", "content": 0.011624597012996674, "timestamp": "2025-09-30 22:13:14.633119", "step": 332, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:14.672852", "step": 332, "epoch": 1 }, { "type": "loss", "content": 0.01413639448583126, "timestamp": "2025-09-30 22:13:14.685908", "step": 333, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:14.724927", "step": 333, "epoch": 1 }, { "type": "loss", "content": 0.014897001907229424, "timestamp": "2025-09-30 22:13:14.738311", "step": 334, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:14.786619", "step": 334, "epoch": 1 }, { "type": "loss", "content": 0.006835165433585644, "timestamp": "2025-09-30 22:13:14.800639", "step": 335, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:14.837125", "step": 335, "epoch": 1 }, { "type": "loss", "content": 0.023027604445815086, "timestamp": "2025-09-30 22:13:14.871449", "step": 336, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:14.907564", "step": 336, "epoch": 1 }, { "type": "loss", "content": 0.0157425869256258, "timestamp": "2025-09-30 22:13:14.916375", "step": 337, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:14.958049", "step": 337, "epoch": 1 }, { "type": "loss", "content": 0.012969511561095715, "timestamp": "2025-09-30 22:13:14.971367", "step": 338, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:15.010020", "step": 338, "epoch": 1 }, { "type": "loss", "content": 0.011469487100839615, "timestamp": "2025-09-30 22:13:15.017177", "step": 339, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:15.052390", "step": 339, "epoch": 1 }, { "type": "loss", "content": 0.029641326516866684, "timestamp": "2025-09-30 22:13:15.084751", "step": 340, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:15.124137", "step": 340, "epoch": 1 }, { "type": "loss", "content": 0.08696480840444565, "timestamp": "2025-09-30 22:13:15.128974", "step": 341, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:15.162128", "step": 341, "epoch": 1 }, { "type": "loss", "content": 0.019450241699814796, "timestamp": "2025-09-30 22:13:15.173369", "step": 342, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:15.212323", "step": 342, "epoch": 1 }, { "type": "loss", "content": 0.041848428547382355, "timestamp": "2025-09-30 22:13:15.219444", "step": 343, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:15.259029", "step": 343, "epoch": 1 }, { "type": "loss", "content": 0.01888955384492874, "timestamp": "2025-09-30 22:13:15.290364", "step": 344, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:15.323047", "step": 344, "epoch": 1 }, { "type": "loss", "content": 0.02714778482913971, "timestamp": "2025-09-30 22:13:15.328488", "step": 345, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:13:17.736554", "step": 345, "epoch": 1 }, { "type": "pplx", "content": 5.524948511337376, "timestamp": "2025-09-30 22:13:17.746433", "step": 345, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:17.780544", "step": 345, "epoch": 1 }, { "type": "loss", "content": 0.025112979114055634, "timestamp": "2025-09-30 22:13:17.787437", "step": 346, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:17.827523", "step": 346, "epoch": 1 }, { "type": "loss", "content": 0.019669389352202415, "timestamp": "2025-09-30 22:13:17.835106", "step": 347, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:17.882114", "step": 347, "epoch": 1 }, { "type": "loss", "content": 0.015546857379376888, "timestamp": "2025-09-30 22:13:17.913962", "step": 348, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:17.948929", "step": 348, "epoch": 1 }, { "type": "loss", "content": 0.01003229059278965, "timestamp": "2025-09-30 22:13:17.962101", "step": 349, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:17.995610", "step": 349, "epoch": 1 }, { "type": "loss", "content": 0.016755785793066025, "timestamp": "2025-09-30 22:13:18.006773", "step": 350, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:18.046062", "step": 350, "epoch": 1 }, { "type": "loss", "content": 0.010388433001935482, "timestamp": "2025-09-30 22:13:18.057205", "step": 351, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:18.093200", "step": 351, "epoch": 1 }, { "type": "loss", "content": 0.024582738056778908, "timestamp": "2025-09-30 22:13:18.127540", "step": 352, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:18.169371", "step": 352, "epoch": 1 }, { "type": "loss", "content": 0.0186713095754385, "timestamp": "2025-09-30 22:13:18.182038", "step": 353, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:18.224574", "step": 353, "epoch": 1 }, { "type": "loss", "content": 0.009301440790295601, "timestamp": "2025-09-30 22:13:18.238338", "step": 354, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:18.281817", "step": 354, "epoch": 1 }, { "type": "loss", "content": 0.01219329982995987, "timestamp": "2025-09-30 22:13:18.295890", "step": 355, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:18.332829", "step": 355, "epoch": 1 }, { "type": "loss", "content": 0.019256679341197014, "timestamp": "2025-09-30 22:13:18.367100", "step": 356, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:18.409278", "step": 356, "epoch": 1 }, { "type": "loss", "content": 0.01657373271882534, "timestamp": "2025-09-30 22:13:18.419278", "step": 357, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:18.465172", "step": 357, "epoch": 1 }, { "type": "loss", "content": 0.013607809320092201, "timestamp": "2025-09-30 22:13:18.477640", "step": 358, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:18.512777", "step": 358, "epoch": 1 }, { "type": "loss", "content": 0.025948015972971916, "timestamp": "2025-09-30 22:13:18.525648", "step": 359, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:18.569299", "step": 359, "epoch": 1 }, { "type": "loss", "content": 0.017592886462807655, "timestamp": "2025-09-30 22:13:18.605981", "step": 360, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:18.650331", "step": 360, "epoch": 1 }, { "type": "loss", "content": 0.013935086317360401, "timestamp": "2025-09-30 22:13:18.663472", "step": 361, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:18.700824", "step": 361, "epoch": 1 }, { "type": "loss", "content": 0.013167818076908588, "timestamp": "2025-09-30 22:13:18.708808", "step": 362, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:18.749408", "step": 362, "epoch": 1 }, { "type": "loss", "content": 0.014511304907500744, "timestamp": "2025-09-30 22:13:18.765314", "step": 363, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:13:18.817733", "step": 363, "epoch": 1 }, { "type": "loss", "content": 0.012271334417164326, "timestamp": "2025-09-30 22:13:18.855926", "step": 364, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:18.888882", "step": 364, "epoch": 1 }, { "type": "loss", "content": 0.024161912500858307, "timestamp": "2025-09-30 22:13:18.898943", "step": 365, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:18.933977", "step": 365, "epoch": 1 }, { "type": "loss", "content": 0.0163529422134161, "timestamp": "2025-09-30 22:13:18.946607", "step": 366, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:18.986572", "step": 366, "epoch": 1 }, { "type": "loss", "content": 0.015899039804935455, "timestamp": "2025-09-30 22:13:18.998613", "step": 367, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:19.034032", "step": 367, "epoch": 1 }, { "type": "loss", "content": 0.013269470073282719, "timestamp": "2025-09-30 22:13:19.066896", "step": 368, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:19.116795", "step": 368, "epoch": 1 }, { "type": "loss", "content": 0.014671982266008854, "timestamp": "2025-09-30 22:13:19.126244", "step": 369, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:19.164760", "step": 369, "epoch": 1 }, { "type": "loss", "content": 0.017914393916726112, "timestamp": "2025-09-30 22:13:19.172440", "step": 370, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:19.208243", "step": 370, "epoch": 1 }, { "type": "loss", "content": 0.013160360977053642, "timestamp": "2025-09-30 22:13:19.219289", "step": 371, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:19.261896", "step": 371, "epoch": 1 }, { "type": "loss", "content": 0.019966568797826767, "timestamp": "2025-09-30 22:13:19.292980", "step": 372, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:19.329699", "step": 372, "epoch": 1 }, { "type": "loss", "content": 0.017061376944184303, "timestamp": "2025-09-30 22:13:19.343672", "step": 373, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:19.386505", "step": 373, "epoch": 1 }, { "type": "loss", "content": 0.013974986970424652, "timestamp": "2025-09-30 22:13:19.399188", "step": 374, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:19.439080", "step": 374, "epoch": 1 }, { "type": "loss", "content": 0.010096900165081024, "timestamp": "2025-09-30 22:13:19.452924", "step": 375, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:19.485798", "step": 375, "epoch": 1 }, { "type": "loss", "content": 0.020089181140065193, "timestamp": "2025-09-30 22:13:19.514617", "step": 376, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:19.547293", "step": 376, "epoch": 1 }, { "type": "loss", "content": 0.016456788405776024, "timestamp": "2025-09-30 22:13:19.552726", "step": 377, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:19.587030", "step": 377, "epoch": 1 }, { "type": "loss", "content": 0.018948769196867943, "timestamp": "2025-09-30 22:13:19.599551", "step": 378, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:19.633793", "step": 378, "epoch": 1 }, { "type": "loss", "content": 0.034312453120946884, "timestamp": "2025-09-30 22:13:19.641266", "step": 379, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:19.679663", "step": 379, "epoch": 1 }, { "type": "loss", "content": 0.02766266278922558, "timestamp": "2025-09-30 22:13:19.712814", "step": 380, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:19.745727", "step": 380, "epoch": 1 }, { "type": "loss", "content": 0.01396411843597889, "timestamp": "2025-09-30 22:13:19.753857", "step": 381, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:19.789120", "step": 381, "epoch": 1 }, { "type": "loss", "content": 0.02729835733771324, "timestamp": "2025-09-30 22:13:19.801393", "step": 382, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:19.837563", "step": 382, "epoch": 1 }, { "type": "loss", "content": 0.01805323176085949, "timestamp": "2025-09-30 22:13:19.850047", "step": 383, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:19.890838", "step": 383, "epoch": 1 }, { "type": "loss", "content": 0.00845720712095499, "timestamp": "2025-09-30 22:13:19.925097", "step": 384, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:19.958952", "step": 384, "epoch": 1 }, { "type": "loss", "content": 0.010211989283561707, "timestamp": "2025-09-30 22:13:19.969208", "step": 385, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:20.005894", "step": 385, "epoch": 1 }, { "type": "loss", "content": 0.0188337080180645, "timestamp": "2025-09-30 22:13:20.013679", "step": 386, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:20.051634", "step": 386, "epoch": 1 }, { "type": "loss", "content": 0.014987856149673462, "timestamp": "2025-09-30 22:13:20.065273", "step": 387, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:20.106865", "step": 387, "epoch": 1 }, { "type": "loss", "content": 0.018355386331677437, "timestamp": "2025-09-30 22:13:20.136337", "step": 388, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:20.172331", "step": 388, "epoch": 1 }, { "type": "loss", "content": 0.01463927049189806, "timestamp": "2025-09-30 22:13:20.177792", "step": 389, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:20.212534", "step": 389, "epoch": 1 }, { "type": "loss", "content": 0.034404098987579346, "timestamp": "2025-09-30 22:13:20.224814", "step": 390, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:20.260739", "step": 390, "epoch": 1 }, { "type": "loss", "content": 0.01674843579530716, "timestamp": "2025-09-30 22:13:20.270988", "step": 391, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:20.319174", "step": 391, "epoch": 1 }, { "type": "loss", "content": 0.011764826253056526, "timestamp": "2025-09-30 22:13:20.352559", "step": 392, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:20.401462", "step": 392, "epoch": 1 }, { "type": "loss", "content": 0.01702144928276539, "timestamp": "2025-09-30 22:13:20.414067", "step": 393, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:20.452853", "step": 393, "epoch": 1 }, { "type": "loss", "content": 0.01283255685120821, "timestamp": "2025-09-30 22:13:20.465402", "step": 394, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:20.502923", "step": 394, "epoch": 1 }, { "type": "loss", "content": 0.027312854304909706, "timestamp": "2025-09-30 22:13:20.510933", "step": 395, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:20.553925", "step": 395, "epoch": 1 }, { "type": "loss", "content": 0.015591930598020554, "timestamp": "2025-09-30 22:13:20.588589", "step": 396, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:20.630990", "step": 396, "epoch": 1 }, { "type": "loss", "content": 0.02703159861266613, "timestamp": "2025-09-30 22:13:20.640864", "step": 397, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:20.682170", "step": 397, "epoch": 1 }, { "type": "loss", "content": 0.015556761994957924, "timestamp": "2025-09-30 22:13:20.695582", "step": 398, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:20.736559", "step": 398, "epoch": 1 }, { "type": "loss", "content": 0.009088275954127312, "timestamp": "2025-09-30 22:13:20.750502", "step": 399, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:20.789467", "step": 399, "epoch": 1 }, { "type": "loss", "content": 0.018833929672837257, "timestamp": "2025-09-30 22:13:20.822884", "step": 400, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:20.861469", "step": 400, "epoch": 1 }, { "type": "loss", "content": 0.016304094344377518, "timestamp": "2025-09-30 22:13:20.873858", "step": 401, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:20.910605", "step": 401, "epoch": 1 }, { "type": "loss", "content": 0.019747715443372726, "timestamp": "2025-09-30 22:13:20.921026", "step": 402, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:20.955389", "step": 402, "epoch": 1 }, { "type": "loss", "content": 0.016585690900683403, "timestamp": "2025-09-30 22:13:20.962508", "step": 403, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:20.997342", "step": 403, "epoch": 1 }, { "type": "loss", "content": 0.01574224978685379, "timestamp": "2025-09-30 22:13:21.025193", "step": 404, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:21.058273", "step": 404, "epoch": 1 }, { "type": "loss", "content": 0.023047136142849922, "timestamp": "2025-09-30 22:13:21.063833", "step": 405, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:21.107626", "step": 405, "epoch": 1 }, { "type": "loss", "content": 0.017483817413449287, "timestamp": "2025-09-30 22:13:21.115650", "step": 406, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:13:21.149237", "step": 406, "epoch": 1 }, { "type": "loss", "content": 0.02771250531077385, "timestamp": "2025-09-30 22:13:21.153427", "step": 407, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:21.197526", "step": 407, "epoch": 1 }, { "type": "loss", "content": 0.0231977179646492, "timestamp": "2025-09-30 22:13:21.225260", "step": 408, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:13:21.285570", "step": 408, "epoch": 1 }, { "type": "loss", "content": 0.009227665141224861, "timestamp": "2025-09-30 22:13:21.302906", "step": 409, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:21.349342", "step": 409, "epoch": 1 }, { "type": "loss", "content": 0.013030946254730225, "timestamp": "2025-09-30 22:13:21.363059", "step": 410, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:13:21.406770", "step": 410, "epoch": 1 }, { "type": "loss", "content": 0.030049873515963554, "timestamp": "2025-09-30 22:13:21.411245", "step": 411, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:21.444924", "step": 411, "epoch": 1 }, { "type": "loss", "content": 0.008911799639463425, "timestamp": "2025-09-30 22:13:21.476707", "step": 412, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:21.510565", "step": 412, "epoch": 1 }, { "type": "loss", "content": 0.023440878838300705, "timestamp": "2025-09-30 22:13:21.515709", "step": 413, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:21.554636", "step": 413, "epoch": 1 }, { "type": "loss", "content": 0.02695058099925518, "timestamp": "2025-09-30 22:13:21.562464", "step": 414, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:13:21.611009", "step": 414, "epoch": 1 }, { "type": "loss", "content": 0.007888389751315117, "timestamp": "2025-09-30 22:13:21.628612", "step": 415, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:21.662425", "step": 415, "epoch": 1 }, { "type": "loss", "content": 0.016789231449365616, "timestamp": "2025-09-30 22:13:21.690813", "step": 416, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:21.724247", "step": 416, "epoch": 1 }, { "type": "loss", "content": 0.009886451996862888, "timestamp": "2025-09-30 22:13:21.729846", "step": 417, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:21.762339", "step": 417, "epoch": 1 }, { "type": "loss", "content": 0.017675846815109253, "timestamp": "2025-09-30 22:13:21.770244", "step": 418, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:21.809806", "step": 418, "epoch": 1 }, { "type": "loss", "content": 0.013344887644052505, "timestamp": "2025-09-30 22:13:21.817771", "step": 419, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:21.861219", "step": 419, "epoch": 1 }, { "type": "loss", "content": 0.013717987574636936, "timestamp": "2025-09-30 22:13:21.895778", "step": 420, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:21.939313", "step": 420, "epoch": 1 }, { "type": "loss", "content": 0.025773746892809868, "timestamp": "2025-09-30 22:13:21.944537", "step": 421, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:21.984423", "step": 421, "epoch": 1 }, { "type": "loss", "content": 0.007907262071967125, "timestamp": "2025-09-30 22:13:21.998102", "step": 422, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:22.035292", "step": 422, "epoch": 1 }, { "type": "loss", "content": 0.013128413818776608, "timestamp": "2025-09-30 22:13:22.048607", "step": 423, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:22.085719", "step": 423, "epoch": 1 }, { "type": "loss", "content": 0.01196190994232893, "timestamp": "2025-09-30 22:13:22.120267", "step": 424, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:13:22.165576", "step": 424, "epoch": 1 }, { "type": "loss", "content": 0.008793395012617111, "timestamp": "2025-09-30 22:13:22.182246", "step": 425, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:22.221255", "step": 425, "epoch": 1 }, { "type": "loss", "content": 0.021444421261548996, "timestamp": "2025-09-30 22:13:22.231554", "step": 426, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:22.266069", "step": 426, "epoch": 1 }, { "type": "loss", "content": 0.03694969043135643, "timestamp": "2025-09-30 22:13:22.273910", "step": 427, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:22.307561", "step": 427, "epoch": 1 }, { "type": "loss", "content": 0.019183486700057983, "timestamp": "2025-09-30 22:13:22.339317", "step": 428, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:22.377199", "step": 428, "epoch": 1 }, { "type": "loss", "content": 0.019474482163786888, "timestamp": "2025-09-30 22:13:22.382384", "step": 429, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:22.429583", "step": 429, "epoch": 1 }, { "type": "loss", "content": 0.009424322284758091, "timestamp": "2025-09-30 22:13:22.445400", "step": 430, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:22.482833", "step": 430, "epoch": 1 }, { "type": "loss", "content": 0.011564414016902447, "timestamp": "2025-09-30 22:13:22.496826", "step": 431, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:22.536004", "step": 431, "epoch": 1 }, { "type": "loss", "content": 0.018689239397644997, "timestamp": "2025-09-30 22:13:22.570532", "step": 432, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:22.609955", "step": 432, "epoch": 1 }, { "type": "loss", "content": 0.016993923112750053, "timestamp": "2025-09-30 22:13:22.618518", "step": 433, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:13:22.651414", "step": 433, "epoch": 1 }, { "type": "loss", "content": 0.025945935398340225, "timestamp": "2025-09-30 22:13:22.655890", "step": 434, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:22.691394", "step": 434, "epoch": 1 }, { "type": "loss", "content": 0.015565545298159122, "timestamp": "2025-09-30 22:13:22.703738", "step": 435, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:22.738785", "step": 435, "epoch": 1 }, { "type": "loss", "content": 0.015718184411525726, "timestamp": "2025-09-30 22:13:22.770729", "step": 436, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:22.809199", "step": 436, "epoch": 1 }, { "type": "loss", "content": 0.019509142264723778, "timestamp": "2025-09-30 22:13:22.821851", "step": 437, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:22.855519", "step": 437, "epoch": 1 }, { "type": "loss", "content": 0.014073725789785385, "timestamp": "2025-09-30 22:13:22.865411", "step": 438, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:22.908783", "step": 438, "epoch": 1 }, { "type": "loss", "content": 0.02983592450618744, "timestamp": "2025-09-30 22:13:22.919793", "step": 439, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:22.961432", "step": 439, "epoch": 1 }, { "type": "loss", "content": 0.010194335132837296, "timestamp": "2025-09-30 22:13:22.995571", "step": 440, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:23.031184", "step": 440, "epoch": 1 }, { "type": "loss", "content": 0.014181242324411869, "timestamp": "2025-09-30 22:13:23.041268", "step": 441, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:23.083944", "step": 441, "epoch": 1 }, { "type": "loss", "content": 0.023306261748075485, "timestamp": "2025-09-30 22:13:23.091842", "step": 442, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:23.131270", "step": 442, "epoch": 1 }, { "type": "loss", "content": 0.016305014491081238, "timestamp": "2025-09-30 22:13:23.143700", "step": 443, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:23.181474", "step": 443, "epoch": 1 }, { "type": "loss", "content": 0.013568984344601631, "timestamp": "2025-09-30 22:13:23.216185", "step": 444, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:23.255134", "step": 444, "epoch": 1 }, { "type": "loss", "content": 0.006708019413053989, "timestamp": "2025-09-30 22:13:23.268330", "step": 445, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:23.305726", "step": 445, "epoch": 1 }, { "type": "loss", "content": 0.012450787238776684, "timestamp": "2025-09-30 22:13:23.319507", "step": 446, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:13:23.364332", "step": 446, "epoch": 1 }, { "type": "loss", "content": 0.008453257381916046, "timestamp": "2025-09-30 22:13:23.380649", "step": 447, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:23.418274", "step": 447, "epoch": 1 }, { "type": "loss", "content": 0.011293847113847733, "timestamp": "2025-09-30 22:13:23.451621", "step": 448, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:23.490542", "step": 448, "epoch": 1 }, { "type": "loss", "content": 0.012872311286628246, "timestamp": "2025-09-30 22:13:23.503170", "step": 449, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:23.547079", "step": 449, "epoch": 1 }, { "type": "loss", "content": 0.008859308436512947, "timestamp": "2025-09-30 22:13:23.562927", "step": 450, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:23.599425", "step": 450, "epoch": 1 }, { "type": "loss", "content": 0.021126611158251762, "timestamp": "2025-09-30 22:13:23.609824", "step": 451, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:23.642304", "step": 451, "epoch": 1 }, { "type": "loss", "content": 0.00786950346082449, "timestamp": "2025-09-30 22:13:23.674344", "step": 452, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:23.712524", "step": 452, "epoch": 1 }, { "type": "loss", "content": 0.016248704865574837, "timestamp": "2025-09-30 22:13:23.720653", "step": 453, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:23.759400", "step": 453, "epoch": 1 }, { "type": "loss", "content": 0.02498588338494301, "timestamp": "2025-09-30 22:13:23.772762", "step": 454, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:23.822089", "step": 454, "epoch": 1 }, { "type": "loss", "content": 0.010999486781656742, "timestamp": "2025-09-30 22:13:23.836108", "step": 455, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:23.869514", "step": 455, "epoch": 1 }, { "type": "loss", "content": 0.015023881569504738, "timestamp": "2025-09-30 22:13:23.902942", "step": 456, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:23.937911", "step": 456, "epoch": 1 }, { "type": "loss", "content": 0.009870980866253376, "timestamp": "2025-09-30 22:13:23.947715", "step": 457, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:23.983566", "step": 457, "epoch": 1 }, { "type": "loss", "content": 0.01719001494348049, "timestamp": "2025-09-30 22:13:23.996153", "step": 458, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:24.032498", "step": 458, "epoch": 1 }, { "type": "loss", "content": 0.01666310243308544, "timestamp": "2025-09-30 22:13:24.040496", "step": 459, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:24.088663", "step": 459, "epoch": 1 }, { "type": "loss", "content": 0.01084128674119711, "timestamp": "2025-09-30 22:13:24.125130", "step": 460, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:13:26.609779", "step": 460, "epoch": 1 }, { "type": "pplx", "content": 5.547471103472087, "timestamp": "2025-09-30 22:13:26.611919", "step": 460, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:26.651656", "step": 460, "epoch": 1 }, { "type": "loss", "content": 0.010452203452587128, "timestamp": "2025-09-30 22:13:26.667004", "step": 461, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:26.704799", "step": 461, "epoch": 1 }, { "type": "loss", "content": 0.011252378113567829, "timestamp": "2025-09-30 22:13:26.718602", "step": 462, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:26.759757", "step": 462, "epoch": 1 }, { "type": "loss", "content": 0.02305806800723076, "timestamp": "2025-09-30 22:13:26.772321", "step": 463, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:26.811072", "step": 463, "epoch": 1 }, { "type": "loss", "content": 0.009146085008978844, "timestamp": "2025-09-30 22:13:26.845946", "step": 464, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:26.881541", "step": 464, "epoch": 1 }, { "type": "loss", "content": 0.025301601737737656, "timestamp": "2025-09-30 22:13:26.886529", "step": 465, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:26.924240", "step": 465, "epoch": 1 }, { "type": "loss", "content": 0.01246948167681694, "timestamp": "2025-09-30 22:13:26.937990", "step": 466, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:26.977081", "step": 466, "epoch": 1 }, { "type": "loss", "content": 0.03232751041650772, "timestamp": "2025-09-30 22:13:26.985132", "step": 467, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:27.018357", "step": 467, "epoch": 1 }, { "type": "loss", "content": 0.025166211649775505, "timestamp": "2025-09-30 22:13:27.047916", "step": 468, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:13:27.089609", "step": 468, "epoch": 1 }, { "type": "loss", "content": 0.008584776893258095, "timestamp": "2025-09-30 22:13:27.106621", "step": 469, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:27.153880", "step": 469, "epoch": 1 }, { "type": "loss", "content": 0.02005261369049549, "timestamp": "2025-09-30 22:13:27.161878", "step": 470, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:27.194883", "step": 470, "epoch": 1 }, { "type": "loss", "content": 0.018570082262158394, "timestamp": "2025-09-30 22:13:27.202310", "step": 471, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:27.238764", "step": 471, "epoch": 1 }, { "type": "loss", "content": 0.01865404099225998, "timestamp": "2025-09-30 22:13:27.269970", "step": 472, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:27.311535", "step": 472, "epoch": 1 }, { "type": "loss", "content": 0.019251475110650063, "timestamp": "2025-09-30 22:13:27.322124", "step": 473, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:13:27.372295", "step": 473, "epoch": 1 }, { "type": "loss", "content": 0.009503374807536602, "timestamp": "2025-09-30 22:13:27.388484", "step": 474, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:27.420591", "step": 474, "epoch": 1 }, { "type": "loss", "content": 0.015804406255483627, "timestamp": "2025-09-30 22:13:27.432758", "step": 475, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:27.472484", "step": 475, "epoch": 1 }, { "type": "loss", "content": 0.01220368966460228, "timestamp": "2025-09-30 22:13:27.505498", "step": 476, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:27.543287", "step": 476, "epoch": 1 }, { "type": "loss", "content": 0.026538586243987083, "timestamp": "2025-09-30 22:13:27.551290", "step": 477, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:27.586846", "step": 477, "epoch": 1 }, { "type": "loss", "content": 0.015999168157577515, "timestamp": "2025-09-30 22:13:27.594757", "step": 478, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:27.631937", "step": 478, "epoch": 1 }, { "type": "loss", "content": 0.021727586165070534, "timestamp": "2025-09-30 22:13:27.639478", "step": 479, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:27.675526", "step": 479, "epoch": 1 }, { "type": "loss", "content": 0.008494660258293152, "timestamp": "2025-09-30 22:13:27.708650", "step": 480, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:27.748775", "step": 480, "epoch": 1 }, { "type": "loss", "content": 0.006171220447868109, "timestamp": "2025-09-30 22:13:27.761452", "step": 481, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:27.795638", "step": 481, "epoch": 1 }, { "type": "loss", "content": 0.016583433374762535, "timestamp": "2025-09-30 22:13:27.808208", "step": 482, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:27.850669", "step": 482, "epoch": 1 }, { "type": "loss", "content": 0.01650671847164631, "timestamp": "2025-09-30 22:13:27.864345", "step": 483, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:27.903833", "step": 483, "epoch": 1 }, { "type": "loss", "content": 0.012906375341117382, "timestamp": "2025-09-30 22:13:27.935343", "step": 484, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:27.970074", "step": 484, "epoch": 1 }, { "type": "loss", "content": 0.019743014127016068, "timestamp": "2025-09-30 22:13:27.978350", "step": 485, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:28.013347", "step": 485, "epoch": 1 }, { "type": "loss", "content": 0.027920933440327644, "timestamp": "2025-09-30 22:13:28.024083", "step": 486, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:28.064736", "step": 486, "epoch": 1 }, { "type": "loss", "content": 0.01832328550517559, "timestamp": "2025-09-30 22:13:28.074965", "step": 487, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:28.115912", "step": 487, "epoch": 1 }, { "type": "loss", "content": 0.012525717727839947, "timestamp": "2025-09-30 22:13:28.152412", "step": 488, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:28.189429", "step": 488, "epoch": 1 }, { "type": "loss", "content": 0.019297080114483833, "timestamp": "2025-09-30 22:13:28.202034", "step": 489, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:28.245096", "step": 489, "epoch": 1 }, { "type": "loss", "content": 0.011257869191467762, "timestamp": "2025-09-30 22:13:28.258863", "step": 490, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:28.299787", "step": 490, "epoch": 1 }, { "type": "loss", "content": 0.022702723741531372, "timestamp": "2025-09-30 22:13:28.307924", "step": 491, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:28.342762", "step": 491, "epoch": 1 }, { "type": "loss", "content": 0.014692867174744606, "timestamp": "2025-09-30 22:13:28.373905", "step": 492, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:28.413206", "step": 492, "epoch": 1 }, { "type": "loss", "content": 0.012005449272692204, "timestamp": "2025-09-30 22:13:28.423746", "step": 493, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:28.461744", "step": 493, "epoch": 1 }, { "type": "loss", "content": 0.012011608108878136, "timestamp": "2025-09-30 22:13:28.475488", "step": 494, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:28.517536", "step": 494, "epoch": 1 }, { "type": "loss", "content": 0.006444706581532955, "timestamp": "2025-09-30 22:13:28.531238", "step": 495, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:28.570258", "step": 495, "epoch": 1 }, { "type": "loss", "content": 0.007369877304881811, "timestamp": "2025-09-30 22:13:28.604786", "step": 496, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:28.641550", "step": 496, "epoch": 1 }, { "type": "loss", "content": 0.010467078536748886, "timestamp": "2025-09-30 22:13:28.654717", "step": 497, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:28.687517", "step": 497, "epoch": 1 }, { "type": "loss", "content": 0.015244974754750729, "timestamp": "2025-09-30 22:13:28.699521", "step": 498, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:28.732464", "step": 498, "epoch": 1 }, { "type": "loss", "content": 0.01717539317905903, "timestamp": "2025-09-30 22:13:28.740334", "step": 499, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:28.783711", "step": 499, "epoch": 1 }, { "type": "loss", "content": 0.013126603327691555, "timestamp": "2025-09-30 22:13:28.816884", "step": 500, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 500", "timestamp": "2025-09-30 22:13:34.118711", "step": 500, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:34.157718", "step": 500, "epoch": 1 }, { "type": "loss", "content": 0.015919821336865425, "timestamp": "2025-09-30 22:13:34.166491", "step": 501, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:34.205992", "step": 501, "epoch": 1 }, { "type": "loss", "content": 0.019572250545024872, "timestamp": "2025-09-30 22:13:34.213856", "step": 502, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:34.255280", "step": 502, "epoch": 1 }, { "type": "loss", "content": 0.009043821133673191, "timestamp": "2025-09-30 22:13:34.270837", "step": 503, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:34.308537", "step": 503, "epoch": 1 }, { "type": "loss", "content": 0.016094347462058067, "timestamp": "2025-09-30 22:13:34.337220", "step": 504, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:34.373861", "step": 504, "epoch": 1 }, { "type": "loss", "content": 0.014503546059131622, "timestamp": "2025-09-30 22:13:34.386596", "step": 505, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:34.419290", "step": 505, "epoch": 1 }, { "type": "loss", "content": 0.0143441092222929, "timestamp": "2025-09-30 22:13:34.430355", "step": 506, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:34.471809", "step": 506, "epoch": 1 }, { "type": "loss", "content": 0.009784374386072159, "timestamp": "2025-09-30 22:13:34.485595", "step": 507, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:34.521390", "step": 507, "epoch": 1 }, { "type": "loss", "content": 0.018428310751914978, "timestamp": "2025-09-30 22:13:34.555645", "step": 508, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:34.594874", "step": 508, "epoch": 1 }, { "type": "loss", "content": 0.01567312888801098, "timestamp": "2025-09-30 22:13:34.605266", "step": 509, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:34.643025", "step": 509, "epoch": 1 }, { "type": "loss", "content": 0.015023056417703629, "timestamp": "2025-09-30 22:13:34.655362", "step": 510, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:34.699084", "step": 510, "epoch": 1 }, { "type": "loss", "content": 0.010495069436728954, "timestamp": "2025-09-30 22:13:34.715011", "step": 511, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:34.754164", "step": 511, "epoch": 1 }, { "type": "loss", "content": 0.010150685906410217, "timestamp": "2025-09-30 22:13:34.789030", "step": 512, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:34.825356", "step": 512, "epoch": 1 }, { "type": "loss", "content": 0.019635174423456192, "timestamp": "2025-09-30 22:13:34.833928", "step": 513, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:34.876992", "step": 513, "epoch": 1 }, { "type": "loss", "content": 0.010541053488850594, "timestamp": "2025-09-30 22:13:34.890729", "step": 514, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:34.925435", "step": 514, "epoch": 1 }, { "type": "loss", "content": 0.019847339019179344, "timestamp": "2025-09-30 22:13:34.937586", "step": 515, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:34.971644", "step": 515, "epoch": 1 }, { "type": "loss", "content": 0.019181231036782265, "timestamp": "2025-09-30 22:13:35.003437", "step": 516, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:35.038271", "step": 516, "epoch": 1 }, { "type": "loss", "content": 0.01539756078273058, "timestamp": "2025-09-30 22:13:35.043192", "step": 517, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:35.080332", "step": 517, "epoch": 1 }, { "type": "loss", "content": 0.01914926804602146, "timestamp": "2025-09-30 22:13:35.090603", "step": 518, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:35.129685", "step": 518, "epoch": 1 }, { "type": "loss", "content": 0.008599474094808102, "timestamp": "2025-09-30 22:13:35.140777", "step": 519, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:35.183037", "step": 519, "epoch": 1 }, { "type": "loss", "content": 0.011888724751770496, "timestamp": "2025-09-30 22:13:35.211061", "step": 520, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:35.245377", "step": 520, "epoch": 1 }, { "type": "loss", "content": 0.012584488838911057, "timestamp": "2025-09-30 22:13:35.251150", "step": 521, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:35.283671", "step": 521, "epoch": 1 }, { "type": "loss", "content": 0.01680966280400753, "timestamp": "2025-09-30 22:13:35.290988", "step": 522, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:35.326768", "step": 522, "epoch": 1 }, { "type": "loss", "content": 0.014662979170680046, "timestamp": "2025-09-30 22:13:35.340535", "step": 523, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:35.372444", "step": 523, "epoch": 1 }, { "type": "loss", "content": 0.008960328064858913, "timestamp": "2025-09-30 22:13:35.401915", "step": 524, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:35.444449", "step": 524, "epoch": 1 }, { "type": "loss", "content": 0.008551303297281265, "timestamp": "2025-09-30 22:13:35.457739", "step": 525, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:35.497904", "step": 525, "epoch": 1 }, { "type": "loss", "content": 0.012693502940237522, "timestamp": "2025-09-30 22:13:35.510516", "step": 526, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:35.548323", "step": 526, "epoch": 1 }, { "type": "loss", "content": 0.014062950387597084, "timestamp": "2025-09-30 22:13:35.556234", "step": 527, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:35.597961", "step": 527, "epoch": 1 }, { "type": "loss", "content": 0.02073533460497856, "timestamp": "2025-09-30 22:13:35.631141", "step": 528, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:35.675027", "step": 528, "epoch": 1 }, { "type": "loss", "content": 0.015901118516921997, "timestamp": "2025-09-30 22:13:35.688306", "step": 529, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:35.727620", "step": 529, "epoch": 1 }, { "type": "loss", "content": 0.008050153031945229, "timestamp": "2025-09-30 22:13:35.740244", "step": 530, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:35.778005", "step": 530, "epoch": 1 }, { "type": "loss", "content": 0.005131041631102562, "timestamp": "2025-09-30 22:13:35.791791", "step": 531, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:35.824717", "step": 531, "epoch": 1 }, { "type": "loss", "content": 0.03233374282717705, "timestamp": "2025-09-30 22:13:35.852577", "step": 532, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:35.899146", "step": 532, "epoch": 1 }, { "type": "loss", "content": 0.04059325531125069, "timestamp": "2025-09-30 22:13:35.903851", "step": 533, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:35.939407", "step": 533, "epoch": 1 }, { "type": "loss", "content": 0.01124960370361805, "timestamp": "2025-09-30 22:13:35.947345", "step": 534, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:35.986463", "step": 534, "epoch": 1 }, { "type": "loss", "content": 0.010681557469069958, "timestamp": "2025-09-30 22:13:35.999831", "step": 535, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:36.035706", "step": 535, "epoch": 1 }, { "type": "loss", "content": 0.014229071326553822, "timestamp": "2025-09-30 22:13:36.063884", "step": 536, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:36.101348", "step": 536, "epoch": 1 }, { "type": "loss", "content": 0.024254217743873596, "timestamp": "2025-09-30 22:13:36.106299", "step": 537, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:36.146100", "step": 537, "epoch": 1 }, { "type": "loss", "content": 0.015680156648159027, "timestamp": "2025-09-30 22:13:36.153817", "step": 538, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:36.194299", "step": 538, "epoch": 1 }, { "type": "loss", "content": 0.010118505917489529, "timestamp": "2025-09-30 22:13:36.207615", "step": 539, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:36.243480", "step": 539, "epoch": 1 }, { "type": "loss", "content": 0.03139385208487511, "timestamp": "2025-09-30 22:13:36.272215", "step": 540, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:36.324438", "step": 540, "epoch": 1 }, { "type": "loss", "content": 0.0242606271058321, "timestamp": "2025-09-30 22:13:36.337548", "step": 541, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:36.374959", "step": 541, "epoch": 1 }, { "type": "loss", "content": 0.015055907890200615, "timestamp": "2025-09-30 22:13:36.388683", "step": 542, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:36.434284", "step": 542, "epoch": 1 }, { "type": "loss", "content": 0.015385239385068417, "timestamp": "2025-09-30 22:13:36.447600", "step": 543, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:36.485204", "step": 543, "epoch": 1 }, { "type": "loss", "content": 0.015145394951105118, "timestamp": "2025-09-30 22:13:36.519976", "step": 544, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:36.556807", "step": 544, "epoch": 1 }, { "type": "loss", "content": 0.026151733472943306, "timestamp": "2025-09-30 22:13:36.566704", "step": 545, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:36.608606", "step": 545, "epoch": 1 }, { "type": "loss", "content": 0.02166786976158619, "timestamp": "2025-09-30 22:13:36.619705", "step": 546, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:36.666620", "step": 546, "epoch": 1 }, { "type": "loss", "content": 0.015373307280242443, "timestamp": "2025-09-30 22:13:36.679197", "step": 547, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:36.715514", "step": 547, "epoch": 1 }, { "type": "loss", "content": 0.018361790105700493, "timestamp": "2025-09-30 22:13:36.757031", "step": 548, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:36.795980", "step": 548, "epoch": 1 }, { "type": "loss", "content": 0.010503076948225498, "timestamp": "2025-09-30 22:13:36.804013", "step": 549, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:36.850648", "step": 549, "epoch": 1 }, { "type": "loss", "content": 0.021157601848244667, "timestamp": "2025-09-30 22:13:36.857881", "step": 550, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:36.896797", "step": 550, "epoch": 1 }, { "type": "loss", "content": 0.026187656447291374, "timestamp": "2025-09-30 22:13:36.903817", "step": 551, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:36.951945", "step": 551, "epoch": 1 }, { "type": "loss", "content": 0.018559563905000687, "timestamp": "2025-09-30 22:13:36.982928", "step": 552, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:37.027735", "step": 552, "epoch": 1 }, { "type": "loss", "content": 0.014130979776382446, "timestamp": "2025-09-30 22:13:37.033314", "step": 553, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:37.073709", "step": 553, "epoch": 1 }, { "type": "loss", "content": 0.02166288159787655, "timestamp": "2025-09-30 22:13:37.081388", "step": 554, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:37.134201", "step": 554, "epoch": 1 }, { "type": "loss", "content": 0.017001010477542877, "timestamp": "2025-09-30 22:13:37.149770", "step": 555, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:37.187826", "step": 555, "epoch": 1 }, { "type": "loss", "content": 0.012803551740944386, "timestamp": "2025-09-30 22:13:37.222427", "step": 556, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:37.256705", "step": 556, "epoch": 1 }, { "type": "loss", "content": 0.016770636662840843, "timestamp": "2025-09-30 22:13:37.262057", "step": 557, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:13:37.304808", "step": 557, "epoch": 1 }, { "type": "loss", "content": 0.03533332794904709, "timestamp": "2025-09-30 22:13:37.310757", "step": 558, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:37.348671", "step": 558, "epoch": 1 }, { "type": "loss", "content": 0.017565395683050156, "timestamp": "2025-09-30 22:13:37.356290", "step": 559, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:37.396057", "step": 559, "epoch": 1 }, { "type": "loss", "content": 0.01131448894739151, "timestamp": "2025-09-30 22:13:37.429428", "step": 560, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:37.463433", "step": 560, "epoch": 1 }, { "type": "loss", "content": 0.015417898073792458, "timestamp": "2025-09-30 22:13:37.470382", "step": 561, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:37.513981", "step": 561, "epoch": 1 }, { "type": "loss", "content": 0.013023782521486282, "timestamp": "2025-09-30 22:13:37.526341", "step": 562, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:37.561114", "step": 562, "epoch": 1 }, { "type": "loss", "content": 0.014379228465259075, "timestamp": "2025-09-30 22:13:37.569222", "step": 563, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:37.604054", "step": 563, "epoch": 1 }, { "type": "loss", "content": 0.020593201741576195, "timestamp": "2025-09-30 22:13:37.632917", "step": 564, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:37.665735", "step": 564, "epoch": 1 }, { "type": "loss", "content": 0.019417183473706245, "timestamp": "2025-09-30 22:13:37.671416", "step": 565, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:37.708814", "step": 565, "epoch": 1 }, { "type": "loss", "content": 0.007432464510202408, "timestamp": "2025-09-30 22:13:37.719190", "step": 566, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:37.758493", "step": 566, "epoch": 1 }, { "type": "loss", "content": 0.01681244932115078, "timestamp": "2025-09-30 22:13:37.772197", "step": 567, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:13:37.825920", "step": 567, "epoch": 1 }, { "type": "loss", "content": 0.04454522579908371, "timestamp": "2025-09-30 22:13:37.855407", "step": 568, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:37.891774", "step": 568, "epoch": 1 }, { "type": "loss", "content": 0.012199216522276402, "timestamp": "2025-09-30 22:13:37.899677", "step": 569, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:37.932621", "step": 569, "epoch": 1 }, { "type": "loss", "content": 0.016835706308484077, "timestamp": "2025-09-30 22:13:37.939892", "step": 570, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:37.976639", "step": 570, "epoch": 1 }, { "type": "loss", "content": 0.017011551186442375, "timestamp": "2025-09-30 22:13:37.990311", "step": 571, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:38.029616", "step": 571, "epoch": 1 }, { "type": "loss", "content": 0.012688270770013332, "timestamp": "2025-09-30 22:13:38.064153", "step": 572, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:38.102966", "step": 572, "epoch": 1 }, { "type": "loss", "content": 0.012485667131841183, "timestamp": "2025-09-30 22:13:38.115602", "step": 573, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:38.152527", "step": 573, "epoch": 1 }, { "type": "loss", "content": 0.03470342978835106, "timestamp": "2025-09-30 22:13:38.160515", "step": 574, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:38.193863", "step": 574, "epoch": 1 }, { "type": "loss", "content": 0.015874244272708893, "timestamp": "2025-09-30 22:13:38.201532", "step": 575, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:13:40.651836", "step": 575, "epoch": 1 }, { "type": "pplx", "content": 5.542289261626032, "timestamp": "2025-09-30 22:13:40.656733", "step": 575, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:40.694481", "step": 575, "epoch": 1 }, { "type": "loss", "content": 0.014259914867579937, "timestamp": "2025-09-30 22:13:40.725352", "step": 576, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:13:40.774137", "step": 576, "epoch": 1 }, { "type": "loss", "content": 0.011850749142467976, "timestamp": "2025-09-30 22:13:40.789978", "step": 577, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:40.835881", "step": 577, "epoch": 1 }, { "type": "loss", "content": 0.014616033993661404, "timestamp": "2025-09-30 22:13:40.847049", "step": 578, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:40.883420", "step": 578, "epoch": 1 }, { "type": "loss", "content": 0.023280199617147446, "timestamp": "2025-09-30 22:13:40.895406", "step": 579, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:40.931029", "step": 579, "epoch": 1 }, { "type": "loss", "content": 0.02082938700914383, "timestamp": "2025-09-30 22:13:40.959902", "step": 580, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 576 ], "flops": 17085996872448 }, "timestamp": "2025-09-30 22:13:41.008909", "step": 580, "epoch": 1 }, { "type": "loss", "content": 0.013227862305939198, "timestamp": "2025-09-30 22:13:41.028178", "step": 581, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:41.065658", "step": 581, "epoch": 1 }, { "type": "loss", "content": 0.009326431900262833, "timestamp": "2025-09-30 22:13:41.079662", "step": 582, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:41.117515", "step": 582, "epoch": 1 }, { "type": "loss", "content": 0.023252299055457115, "timestamp": "2025-09-30 22:13:41.127683", "step": 583, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:41.167813", "step": 583, "epoch": 1 }, { "type": "loss", "content": 0.02949347347021103, "timestamp": "2025-09-30 22:13:41.198901", "step": 584, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:41.234977", "step": 584, "epoch": 1 }, { "type": "loss", "content": 0.015549993142485619, "timestamp": "2025-09-30 22:13:41.245442", "step": 585, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:41.281783", "step": 585, "epoch": 1 }, { "type": "loss", "content": 0.004767420701682568, "timestamp": "2025-09-30 22:13:41.295684", "step": 586, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:41.333337", "step": 586, "epoch": 1 }, { "type": "loss", "content": 0.008911280892789364, "timestamp": "2025-09-30 22:13:41.346963", "step": 587, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:41.380866", "step": 587, "epoch": 1 }, { "type": "loss", "content": 0.01907944679260254, "timestamp": "2025-09-30 22:13:41.408785", "step": 588, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:41.449487", "step": 588, "epoch": 1 }, { "type": "loss", "content": 0.020382946357131004, "timestamp": "2025-09-30 22:13:41.457957", "step": 589, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:41.491429", "step": 589, "epoch": 1 }, { "type": "loss", "content": 0.014416304416954517, "timestamp": "2025-09-30 22:13:41.502478", "step": 590, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:41.536612", "step": 590, "epoch": 1 }, { "type": "loss", "content": 0.023991908878087997, "timestamp": "2025-09-30 22:13:41.547626", "step": 591, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:41.582475", "step": 591, "epoch": 1 }, { "type": "loss", "content": 0.010505203157663345, "timestamp": "2025-09-30 22:13:41.613774", "step": 592, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:13:41.664513", "step": 592, "epoch": 1 }, { "type": "loss", "content": 0.005415383726358414, "timestamp": "2025-09-30 22:13:41.681220", "step": 593, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:41.713701", "step": 593, "epoch": 1 }, { "type": "loss", "content": 0.016899965703487396, "timestamp": "2025-09-30 22:13:41.723283", "step": 594, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:41.764845", "step": 594, "epoch": 1 }, { "type": "loss", "content": 0.015798628330230713, "timestamp": "2025-09-30 22:13:41.774961", "step": 595, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:41.808146", "step": 595, "epoch": 1 }, { "type": "loss", "content": 0.027095021679997444, "timestamp": "2025-09-30 22:13:41.836849", "step": 596, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:41.871885", "step": 596, "epoch": 1 }, { "type": "loss", "content": 0.017347702756524086, "timestamp": "2025-09-30 22:13:41.877564", "step": 597, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:41.914114", "step": 597, "epoch": 1 }, { "type": "loss", "content": 0.0157342329621315, "timestamp": "2025-09-30 22:13:41.925129", "step": 598, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:41.960821", "step": 598, "epoch": 1 }, { "type": "loss", "content": 0.011855707503855228, "timestamp": "2025-09-30 22:13:41.974239", "step": 599, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:42.009285", "step": 599, "epoch": 1 }, { "type": "loss", "content": 0.021833263337612152, "timestamp": "2025-09-30 22:13:42.041404", "step": 600, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:42.075086", "step": 600, "epoch": 1 }, { "type": "loss", "content": 0.022402599453926086, "timestamp": "2025-09-30 22:13:42.084943", "step": 601, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:42.125604", "step": 601, "epoch": 1 }, { "type": "loss", "content": 0.013091500848531723, "timestamp": "2025-09-30 22:13:42.134604", "step": 602, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:42.176735", "step": 602, "epoch": 1 }, { "type": "loss", "content": 0.01641802117228508, "timestamp": "2025-09-30 22:13:42.184313", "step": 603, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:42.218204", "step": 603, "epoch": 1 }, { "type": "loss", "content": 0.017607904970645905, "timestamp": "2025-09-30 22:13:42.250005", "step": 604, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:42.284753", "step": 604, "epoch": 1 }, { "type": "loss", "content": 0.017780223861336708, "timestamp": "2025-09-30 22:13:42.290138", "step": 605, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:42.326782", "step": 605, "epoch": 1 }, { "type": "loss", "content": 0.02521861344575882, "timestamp": "2025-09-30 22:13:42.337254", "step": 606, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:42.379715", "step": 606, "epoch": 1 }, { "type": "loss", "content": 0.015294995158910751, "timestamp": "2025-09-30 22:13:42.393584", "step": 607, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:42.427099", "step": 607, "epoch": 1 }, { "type": "loss", "content": 0.03106614388525486, "timestamp": "2025-09-30 22:13:42.457640", "step": 608, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:42.495788", "step": 608, "epoch": 1 }, { "type": "loss", "content": 0.01641874760389328, "timestamp": "2025-09-30 22:13:42.504501", "step": 609, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:42.544586", "step": 609, "epoch": 1 }, { "type": "loss", "content": 0.024906247854232788, "timestamp": "2025-09-30 22:13:42.554950", "step": 610, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:42.592108", "step": 610, "epoch": 1 }, { "type": "loss", "content": 0.019705880433321, "timestamp": "2025-09-30 22:13:42.603191", "step": 611, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:42.637547", "step": 611, "epoch": 1 }, { "type": "loss", "content": 0.012636306695640087, "timestamp": "2025-09-30 22:13:42.669395", "step": 612, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:42.701195", "step": 612, "epoch": 1 }, { "type": "loss", "content": 0.0174989253282547, "timestamp": "2025-09-30 22:13:42.710964", "step": 613, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:42.749637", "step": 613, "epoch": 1 }, { "type": "loss", "content": 0.01590840145945549, "timestamp": "2025-09-30 22:13:42.760667", "step": 614, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:42.795312", "step": 614, "epoch": 1 }, { "type": "loss", "content": 0.01362869143486023, "timestamp": "2025-09-30 22:13:42.806454", "step": 615, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:42.848751", "step": 615, "epoch": 1 }, { "type": "loss", "content": 0.009033956564962864, "timestamp": "2025-09-30 22:13:42.882156", "step": 616, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:42.918397", "step": 616, "epoch": 1 }, { "type": "loss", "content": 0.021320108324289322, "timestamp": "2025-09-30 22:13:42.924003", "step": 617, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:42.956642", "step": 617, "epoch": 1 }, { "type": "loss", "content": 0.019740305840969086, "timestamp": "2025-09-30 22:13:42.966911", "step": 618, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:43.011329", "step": 618, "epoch": 1 }, { "type": "loss", "content": 0.022858453914523125, "timestamp": "2025-09-30 22:13:43.022590", "step": 619, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:43.054936", "step": 619, "epoch": 1 }, { "type": "loss", "content": 0.018595917150378227, "timestamp": "2025-09-30 22:13:43.083658", "step": 620, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:43.120775", "step": 620, "epoch": 1 }, { "type": "loss", "content": 0.011981564573943615, "timestamp": "2025-09-30 22:13:43.133812", "step": 621, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:13:43.176548", "step": 621, "epoch": 1 }, { "type": "loss", "content": 0.01760626956820488, "timestamp": "2025-09-30 22:13:43.193848", "step": 622, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:43.233906", "step": 622, "epoch": 1 }, { "type": "loss", "content": 0.008837936446070671, "timestamp": "2025-09-30 22:13:43.248018", "step": 623, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:43.284221", "step": 623, "epoch": 1 }, { "type": "loss", "content": 0.015375801362097263, "timestamp": "2025-09-30 22:13:43.318691", "step": 624, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:43.353806", "step": 624, "epoch": 1 }, { "type": "loss", "content": 0.014492068439722061, "timestamp": "2025-09-30 22:13:43.362488", "step": 625, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:43.402925", "step": 625, "epoch": 1 }, { "type": "loss", "content": 0.01539452001452446, "timestamp": "2025-09-30 22:13:43.416672", "step": 626, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:13:43.462546", "step": 626, "epoch": 1 }, { "type": "loss", "content": 0.009434428997337818, "timestamp": "2025-09-30 22:13:43.479609", "step": 627, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:43.518443", "step": 627, "epoch": 1 }, { "type": "loss", "content": 0.01171807199716568, "timestamp": "2025-09-30 22:13:43.553061", "step": 628, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:43.594337", "step": 628, "epoch": 1 }, { "type": "loss", "content": 0.014136611483991146, "timestamp": "2025-09-30 22:13:43.607650", "step": 629, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:13:43.656488", "step": 629, "epoch": 1 }, { "type": "loss", "content": 0.017313985154032707, "timestamp": "2025-09-30 22:13:43.672879", "step": 630, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:13:43.725036", "step": 630, "epoch": 1 }, { "type": "loss", "content": 0.0077360207214951515, "timestamp": "2025-09-30 22:13:43.742352", "step": 631, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:43.783843", "step": 631, "epoch": 1 }, { "type": "loss", "content": 0.012235143221914768, "timestamp": "2025-09-30 22:13:43.818751", "step": 632, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:13:43.864741", "step": 632, "epoch": 1 }, { "type": "loss", "content": 0.0079409284517169, "timestamp": "2025-09-30 22:13:43.880618", "step": 633, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:43.916500", "step": 633, "epoch": 1 }, { "type": "loss", "content": 0.026523813605308533, "timestamp": "2025-09-30 22:13:43.925891", "step": 634, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:43.962688", "step": 634, "epoch": 1 }, { "type": "loss", "content": 0.008819608949124813, "timestamp": "2025-09-30 22:13:43.970518", "step": 635, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:44.006185", "step": 635, "epoch": 1 }, { "type": "loss", "content": 0.011811159551143646, "timestamp": "2025-09-30 22:13:44.039358", "step": 636, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:44.079084", "step": 636, "epoch": 1 }, { "type": "loss", "content": 0.018270481377840042, "timestamp": "2025-09-30 22:13:44.088829", "step": 637, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:44.127641", "step": 637, "epoch": 1 }, { "type": "loss", "content": 0.014364159666001797, "timestamp": "2025-09-30 22:13:44.139966", "step": 638, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:44.184200", "step": 638, "epoch": 1 }, { "type": "loss", "content": 0.013673737645149231, "timestamp": "2025-09-30 22:13:44.196801", "step": 639, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:44.233771", "step": 639, "epoch": 1 }, { "type": "loss", "content": 0.013592913746833801, "timestamp": "2025-09-30 22:13:44.264940", "step": 640, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:44.300405", "step": 640, "epoch": 1 }, { "type": "loss", "content": 0.013886515982449055, "timestamp": "2025-09-30 22:13:44.313121", "step": 641, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:44.354448", "step": 641, "epoch": 1 }, { "type": "loss", "content": 0.017853496596217155, "timestamp": "2025-09-30 22:13:44.367796", "step": 642, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:44.409545", "step": 642, "epoch": 1 }, { "type": "loss", "content": 0.013504012487828732, "timestamp": "2025-09-30 22:13:44.422805", "step": 643, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:44.464796", "step": 643, "epoch": 1 }, { "type": "loss", "content": 0.024404358118772507, "timestamp": "2025-09-30 22:13:44.492542", "step": 644, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:44.534800", "step": 644, "epoch": 1 }, { "type": "loss", "content": 0.012465760111808777, "timestamp": "2025-09-30 22:13:44.543478", "step": 645, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:44.583035", "step": 645, "epoch": 1 }, { "type": "loss", "content": 0.009218334220349789, "timestamp": "2025-09-30 22:13:44.595325", "step": 646, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:44.640836", "step": 646, "epoch": 1 }, { "type": "loss", "content": 0.016696080565452576, "timestamp": "2025-09-30 22:13:44.648495", "step": 647, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:44.689146", "step": 647, "epoch": 1 }, { "type": "loss", "content": 0.015427506528794765, "timestamp": "2025-09-30 22:13:44.722366", "step": 648, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:44.761503", "step": 648, "epoch": 1 }, { "type": "loss", "content": 0.018487118184566498, "timestamp": "2025-09-30 22:13:44.767816", "step": 649, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:44.801172", "step": 649, "epoch": 1 }, { "type": "loss", "content": 0.019440632313489914, "timestamp": "2025-09-30 22:13:44.811652", "step": 650, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:44.842722", "step": 650, "epoch": 1 }, { "type": "loss", "content": 0.02813410945236683, "timestamp": "2025-09-30 22:13:44.849701", "step": 651, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:44.889282", "step": 651, "epoch": 1 }, { "type": "loss", "content": 0.0211077481508255, "timestamp": "2025-09-30 22:13:44.920427", "step": 652, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:44.958158", "step": 652, "epoch": 1 }, { "type": "loss", "content": 0.015093185007572174, "timestamp": "2025-09-30 22:13:44.966021", "step": 653, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:45.003385", "step": 653, "epoch": 1 }, { "type": "loss", "content": 0.020069634541869164, "timestamp": "2025-09-30 22:13:45.015845", "step": 654, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:45.049982", "step": 654, "epoch": 1 }, { "type": "loss", "content": 0.011099635623395443, "timestamp": "2025-09-30 22:13:45.060301", "step": 655, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:45.092528", "step": 655, "epoch": 1 }, { "type": "loss", "content": 0.012230321764945984, "timestamp": "2025-09-30 22:13:45.121315", "step": 656, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:45.154483", "step": 656, "epoch": 1 }, { "type": "loss", "content": 0.019323699176311493, "timestamp": "2025-09-30 22:13:45.159780", "step": 657, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:45.197662", "step": 657, "epoch": 1 }, { "type": "loss", "content": 0.01795719750225544, "timestamp": "2025-09-30 22:13:45.204505", "step": 658, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:45.236063", "step": 658, "epoch": 1 }, { "type": "loss", "content": 0.025249261409044266, "timestamp": "2025-09-30 22:13:45.244008", "step": 659, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:45.276674", "step": 659, "epoch": 1 }, { "type": "loss", "content": 0.017105920240283012, "timestamp": "2025-09-30 22:13:45.305612", "step": 660, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:45.343907", "step": 660, "epoch": 1 }, { "type": "loss", "content": 0.010925698094069958, "timestamp": "2025-09-30 22:13:45.351844", "step": 661, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:45.385988", "step": 661, "epoch": 1 }, { "type": "loss", "content": 0.009514889679849148, "timestamp": "2025-09-30 22:13:45.398487", "step": 662, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:13:45.435188", "step": 662, "epoch": 1 }, { "type": "loss", "content": 0.02289985679090023, "timestamp": "2025-09-30 22:13:45.439812", "step": 663, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:45.473761", "step": 663, "epoch": 1 }, { "type": "loss", "content": 0.03659540042281151, "timestamp": "2025-09-30 22:13:45.501589", "step": 664, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:45.533023", "step": 664, "epoch": 1 }, { "type": "loss", "content": 0.03184283524751663, "timestamp": "2025-09-30 22:13:45.542741", "step": 665, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:45.574688", "step": 665, "epoch": 1 }, { "type": "loss", "content": 0.00956232100725174, "timestamp": "2025-09-30 22:13:45.585185", "step": 666, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:45.622382", "step": 666, "epoch": 1 }, { "type": "loss", "content": 0.011457591317594051, "timestamp": "2025-09-30 22:13:45.635753", "step": 667, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:45.674621", "step": 667, "epoch": 1 }, { "type": "loss", "content": 0.024427276104688644, "timestamp": "2025-09-30 22:13:45.703401", "step": 668, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:45.743551", "step": 668, "epoch": 1 }, { "type": "loss", "content": 0.01626475900411606, "timestamp": "2025-09-30 22:13:45.751576", "step": 669, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:45.800286", "step": 669, "epoch": 1 }, { "type": "loss", "content": 0.010417117737233639, "timestamp": "2025-09-30 22:13:45.816069", "step": 670, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:45.862642", "step": 670, "epoch": 1 }, { "type": "loss", "content": 0.019533686339855194, "timestamp": "2025-09-30 22:13:45.875952", "step": 671, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:45.911280", "step": 671, "epoch": 1 }, { "type": "loss", "content": 0.010500402189791203, "timestamp": "2025-09-30 22:13:45.945792", "step": 672, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:45.979716", "step": 672, "epoch": 1 }, { "type": "loss", "content": 0.013673270121216774, "timestamp": "2025-09-30 22:13:45.992800", "step": 673, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:46.030483", "step": 673, "epoch": 1 }, { "type": "loss", "content": 0.009827575646340847, "timestamp": "2025-09-30 22:13:46.042761", "step": 674, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:46.075750", "step": 674, "epoch": 1 }, { "type": "loss", "content": 0.014296416193246841, "timestamp": "2025-09-30 22:13:46.083817", "step": 675, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:46.121583", "step": 675, "epoch": 1 }, { "type": "loss", "content": 0.01631656102836132, "timestamp": "2025-09-30 22:13:46.156078", "step": 676, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:46.191309", "step": 676, "epoch": 1 }, { "type": "loss", "content": 0.020268293097615242, "timestamp": "2025-09-30 22:13:46.206119", "step": 677, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:46.245281", "step": 677, "epoch": 1 }, { "type": "loss", "content": 0.013104826211929321, "timestamp": "2025-09-30 22:13:46.258911", "step": 678, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:46.290720", "step": 678, "epoch": 1 }, { "type": "loss", "content": 0.014018409885466099, "timestamp": "2025-09-30 22:13:46.297705", "step": 679, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:46.334566", "step": 679, "epoch": 1 }, { "type": "loss", "content": 0.015949858352541924, "timestamp": "2025-09-30 22:13:46.363117", "step": 680, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:46.400618", "step": 680, "epoch": 1 }, { "type": "loss", "content": 0.018937628716230392, "timestamp": "2025-09-30 22:13:46.415725", "step": 681, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:46.451324", "step": 681, "epoch": 1 }, { "type": "loss", "content": 0.016113348305225372, "timestamp": "2025-09-30 22:13:46.461558", "step": 682, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:46.497757", "step": 682, "epoch": 1 }, { "type": "loss", "content": 0.01504132803529501, "timestamp": "2025-09-30 22:13:46.505006", "step": 683, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:46.538979", "step": 683, "epoch": 1 }, { "type": "loss", "content": 0.021047841757535934, "timestamp": "2025-09-30 22:13:46.570851", "step": 684, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:46.605659", "step": 684, "epoch": 1 }, { "type": "loss", "content": 0.012815596535801888, "timestamp": "2025-09-30 22:13:46.611235", "step": 685, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:46.645055", "step": 685, "epoch": 1 }, { "type": "loss", "content": 0.02239864692091942, "timestamp": "2025-09-30 22:13:46.657394", "step": 686, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:46.691287", "step": 686, "epoch": 1 }, { "type": "loss", "content": 0.05522322282195091, "timestamp": "2025-09-30 22:13:46.698954", "step": 687, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:46.736092", "step": 687, "epoch": 1 }, { "type": "loss", "content": 0.047288164496421814, "timestamp": "2025-09-30 22:13:46.769520", "step": 688, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:46.806086", "step": 688, "epoch": 1 }, { "type": "loss", "content": 0.019594833254814148, "timestamp": "2025-09-30 22:13:46.819103", "step": 689, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:46.860683", "step": 689, "epoch": 1 }, { "type": "loss", "content": 0.018771354109048843, "timestamp": "2025-09-30 22:13:46.871037", "step": 690, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:13:49.364533", "step": 690, "epoch": 1 }, { "type": "pplx", "content": 5.484566580135139, "timestamp": "2025-09-30 22:13:49.368457", "step": 690, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:49.412394", "step": 690, "epoch": 1 }, { "type": "loss", "content": 0.025742974132299423, "timestamp": "2025-09-30 22:13:49.421766", "step": 691, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:49.459471", "step": 691, "epoch": 1 }, { "type": "loss", "content": 0.013385286554694176, "timestamp": "2025-09-30 22:13:49.491090", "step": 692, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:49.526721", "step": 692, "epoch": 1 }, { "type": "loss", "content": 0.00965004600584507, "timestamp": "2025-09-30 22:13:49.534601", "step": 693, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:49.570431", "step": 693, "epoch": 1 }, { "type": "loss", "content": 0.018168870359659195, "timestamp": "2025-09-30 22:13:49.577668", "step": 694, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:49.613251", "step": 694, "epoch": 1 }, { "type": "loss", "content": 0.013506671413779259, "timestamp": "2025-09-30 22:13:49.624130", "step": 695, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:13:49.670015", "step": 695, "epoch": 1 }, { "type": "loss", "content": 0.009025247767567635, "timestamp": "2025-09-30 22:13:49.707090", "step": 696, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:13:49.745185", "step": 696, "epoch": 1 }, { "type": "loss", "content": 0.041763078421354294, "timestamp": "2025-09-30 22:13:49.756531", "step": 697, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:49.799941", "step": 697, "epoch": 1 }, { "type": "loss", "content": 0.04631451517343521, "timestamp": "2025-09-30 22:13:49.813388", "step": 698, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:49.861839", "step": 698, "epoch": 1 }, { "type": "loss", "content": 0.01658572070300579, "timestamp": "2025-09-30 22:13:49.869480", "step": 699, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:49.906859", "step": 699, "epoch": 1 }, { "type": "loss", "content": 0.009366752579808235, "timestamp": "2025-09-30 22:13:49.938726", "step": 700, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:49.973301", "step": 700, "epoch": 1 }, { "type": "loss", "content": 0.020774291828274727, "timestamp": "2025-09-30 22:13:49.984670", "step": 701, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:50.022185", "step": 701, "epoch": 1 }, { "type": "loss", "content": 0.015729684382677078, "timestamp": "2025-09-30 22:13:50.033250", "step": 702, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:50.067552", "step": 702, "epoch": 1 }, { "type": "loss", "content": 0.011922284960746765, "timestamp": "2025-09-30 22:13:50.079759", "step": 703, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:50.134609", "step": 703, "epoch": 1 }, { "type": "loss", "content": 0.013412105850875378, "timestamp": "2025-09-30 22:13:50.162720", "step": 704, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:50.203110", "step": 704, "epoch": 1 }, { "type": "loss", "content": 0.027656683698296547, "timestamp": "2025-09-30 22:13:50.211735", "step": 705, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:50.246411", "step": 705, "epoch": 1 }, { "type": "loss", "content": 0.017529543489217758, "timestamp": "2025-09-30 22:13:50.253359", "step": 706, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:50.286731", "step": 706, "epoch": 1 }, { "type": "loss", "content": 0.012763168662786484, "timestamp": "2025-09-30 22:13:50.293502", "step": 707, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:50.327321", "step": 707, "epoch": 1 }, { "type": "loss", "content": 0.023422418162226677, "timestamp": "2025-09-30 22:13:50.355691", "step": 708, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:50.390168", "step": 708, "epoch": 1 }, { "type": "loss", "content": 0.020299313589930534, "timestamp": "2025-09-30 22:13:50.395455", "step": 709, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:50.430736", "step": 709, "epoch": 1 }, { "type": "loss", "content": 0.01444624736905098, "timestamp": "2025-09-30 22:13:50.437911", "step": 710, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:50.473148", "step": 710, "epoch": 1 }, { "type": "loss", "content": 0.02190915122628212, "timestamp": "2025-09-30 22:13:50.482260", "step": 711, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:50.517666", "step": 711, "epoch": 1 }, { "type": "loss", "content": 0.01795337162911892, "timestamp": "2025-09-30 22:13:50.549058", "step": 712, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:50.591108", "step": 712, "epoch": 1 }, { "type": "loss", "content": 0.022492265328764915, "timestamp": "2025-09-30 22:13:50.595952", "step": 713, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:50.636614", "step": 713, "epoch": 1 }, { "type": "loss", "content": 0.018829617649316788, "timestamp": "2025-09-30 22:13:50.647014", "step": 714, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:13:50.683152", "step": 714, "epoch": 1 }, { "type": "loss", "content": 0.024367261677980423, "timestamp": "2025-09-30 22:13:50.687425", "step": 715, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:13:50.721032", "step": 715, "epoch": 1 }, { "type": "loss", "content": 0.01960578002035618, "timestamp": "2025-09-30 22:13:50.748399", "step": 716, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:50.783905", "step": 716, "epoch": 1 }, { "type": "loss", "content": 0.021256817504763603, "timestamp": "2025-09-30 22:13:50.788518", "step": 717, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:50.829916", "step": 717, "epoch": 1 }, { "type": "loss", "content": 0.03248652443289757, "timestamp": "2025-09-30 22:13:50.839728", "step": 718, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:50.885312", "step": 718, "epoch": 1 }, { "type": "loss", "content": 0.028153620660305023, "timestamp": "2025-09-30 22:13:50.893291", "step": 719, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:50.931729", "step": 719, "epoch": 1 }, { "type": "loss", "content": 0.006349663250148296, "timestamp": "2025-09-30 22:13:50.959774", "step": 720, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:50.994981", "step": 720, "epoch": 1 }, { "type": "loss", "content": 0.018377935513854027, "timestamp": "2025-09-30 22:13:51.000468", "step": 721, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:51.038006", "step": 721, "epoch": 1 }, { "type": "loss", "content": 0.01730031706392765, "timestamp": "2025-09-30 22:13:51.046409", "step": 722, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:51.079171", "step": 722, "epoch": 1 }, { "type": "loss", "content": 0.022774288430809975, "timestamp": "2025-09-30 22:13:51.086378", "step": 723, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:51.142088", "step": 723, "epoch": 1 }, { "type": "loss", "content": 0.020183557644486427, "timestamp": "2025-09-30 22:13:51.178808", "step": 724, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:51.213986", "step": 724, "epoch": 1 }, { "type": "loss", "content": 0.023975694552063942, "timestamp": "2025-09-30 22:13:51.219234", "step": 725, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:51.262463", "step": 725, "epoch": 1 }, { "type": "loss", "content": 0.011222700588405132, "timestamp": "2025-09-30 22:13:51.269714", "step": 726, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:51.306276", "step": 726, "epoch": 1 }, { "type": "loss", "content": 0.011557246558368206, "timestamp": "2025-09-30 22:13:51.317352", "step": 727, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:51.356064", "step": 727, "epoch": 1 }, { "type": "loss", "content": 0.010076114907860756, "timestamp": "2025-09-30 22:13:51.390759", "step": 728, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:51.421783", "step": 728, "epoch": 1 }, { "type": "loss", "content": 0.018097903579473495, "timestamp": "2025-09-30 22:13:51.430402", "step": 729, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:51.468030", "step": 729, "epoch": 1 }, { "type": "loss", "content": 0.01810041442513466, "timestamp": "2025-09-30 22:13:51.475202", "step": 730, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:51.510176", "step": 730, "epoch": 1 }, { "type": "loss", "content": 0.014912882819771767, "timestamp": "2025-09-30 22:13:51.517885", "step": 731, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:51.554991", "step": 731, "epoch": 1 }, { "type": "loss", "content": 0.030342083424329758, "timestamp": "2025-09-30 22:13:51.586228", "step": 732, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:51.623186", "step": 732, "epoch": 1 }, { "type": "loss", "content": 0.020088672637939453, "timestamp": "2025-09-30 22:13:51.628865", "step": 733, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:51.669345", "step": 733, "epoch": 1 }, { "type": "loss", "content": 0.02861752174794674, "timestamp": "2025-09-30 22:13:51.681846", "step": 734, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:51.717305", "step": 734, "epoch": 1 }, { "type": "loss", "content": 0.018066808581352234, "timestamp": "2025-09-30 22:13:51.728252", "step": 735, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:51.765716", "step": 735, "epoch": 1 }, { "type": "loss", "content": 0.01734330505132675, "timestamp": "2025-09-30 22:13:51.793853", "step": 736, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:51.835557", "step": 736, "epoch": 1 }, { "type": "loss", "content": 0.01800304837524891, "timestamp": "2025-09-30 22:13:51.850671", "step": 737, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:51.883073", "step": 737, "epoch": 1 }, { "type": "loss", "content": 0.013623161241412163, "timestamp": "2025-09-30 22:13:51.891018", "step": 738, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:51.924341", "step": 738, "epoch": 1 }, { "type": "loss", "content": 0.013523743487894535, "timestamp": "2025-09-30 22:13:51.934770", "step": 739, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:51.969832", "step": 739, "epoch": 1 }, { "type": "loss", "content": 0.017236221581697464, "timestamp": "2025-09-30 22:13:52.001212", "step": 740, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:52.045376", "step": 740, "epoch": 1 }, { "type": "loss", "content": 0.023654861375689507, "timestamp": "2025-09-30 22:13:52.051027", "step": 741, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:13:52.096205", "step": 741, "epoch": 1 }, { "type": "loss", "content": 0.010722161270678043, "timestamp": "2025-09-30 22:13:52.113567", "step": 742, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:52.155172", "step": 742, "epoch": 1 }, { "type": "loss", "content": 0.013899749144911766, "timestamp": "2025-09-30 22:13:52.167517", "step": 743, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:52.206419", "step": 743, "epoch": 1 }, { "type": "loss", "content": 0.020585909485816956, "timestamp": "2025-09-30 22:13:52.234992", "step": 744, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:52.269850", "step": 744, "epoch": 1 }, { "type": "loss", "content": 0.01175573468208313, "timestamp": "2025-09-30 22:13:52.280511", "step": 745, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:52.321935", "step": 745, "epoch": 1 }, { "type": "loss", "content": 0.023122472688555717, "timestamp": "2025-09-30 22:13:52.329965", "step": 746, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:52.367523", "step": 746, "epoch": 1 }, { "type": "loss", "content": 0.019044872373342514, "timestamp": "2025-09-30 22:13:52.379841", "step": 747, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:52.422914", "step": 747, "epoch": 1 }, { "type": "loss", "content": 0.013782423920929432, "timestamp": "2025-09-30 22:13:52.451727", "step": 748, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:52.489172", "step": 748, "epoch": 1 }, { "type": "loss", "content": 0.020801430568099022, "timestamp": "2025-09-30 22:13:52.497779", "step": 749, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:52.531028", "step": 749, "epoch": 1 }, { "type": "loss", "content": 0.023138711228966713, "timestamp": "2025-09-30 22:13:52.542056", "step": 750, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:52.580836", "step": 750, "epoch": 1 }, { "type": "loss", "content": 0.007247679401189089, "timestamp": "2025-09-30 22:13:52.591421", "step": 751, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:52.628640", "step": 751, "epoch": 1 }, { "type": "loss", "content": 0.011202228255569935, "timestamp": "2025-09-30 22:13:52.663217", "step": 752, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:52.702720", "step": 752, "epoch": 1 }, { "type": "loss", "content": 0.013906356878578663, "timestamp": "2025-09-30 22:13:52.711409", "step": 753, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:52.743736", "step": 753, "epoch": 1 }, { "type": "loss", "content": 0.012691138312220573, "timestamp": "2025-09-30 22:13:52.754963", "step": 754, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:52.795711", "step": 754, "epoch": 1 }, { "type": "loss", "content": 0.016555367037653923, "timestamp": "2025-09-30 22:13:52.806841", "step": 755, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:52.841675", "step": 755, "epoch": 1 }, { "type": "loss", "content": 0.01773860678076744, "timestamp": "2025-09-30 22:13:52.875041", "step": 756, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:52.910277", "step": 756, "epoch": 1 }, { "type": "loss", "content": 0.02634359337389469, "timestamp": "2025-09-30 22:13:52.918145", "step": 757, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:52.959796", "step": 757, "epoch": 1 }, { "type": "loss", "content": 0.016910066828131676, "timestamp": "2025-09-30 22:13:52.970933", "step": 758, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:53.008546", "step": 758, "epoch": 1 }, { "type": "loss", "content": 0.0069466689601540565, "timestamp": "2025-09-30 22:13:53.022322", "step": 759, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:53.062812", "step": 759, "epoch": 1 }, { "type": "loss", "content": 0.020431499928236008, "timestamp": "2025-09-30 22:13:53.097408", "step": 760, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:53.139348", "step": 760, "epoch": 1 }, { "type": "loss", "content": 0.015317758545279503, "timestamp": "2025-09-30 22:13:53.152448", "step": 761, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:53.192466", "step": 761, "epoch": 1 }, { "type": "loss", "content": 0.0235912948846817, "timestamp": "2025-09-30 22:13:53.202840", "step": 762, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:53.241983", "step": 762, "epoch": 1 }, { "type": "loss", "content": 0.009301051497459412, "timestamp": "2025-09-30 22:13:53.255674", "step": 763, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:53.289280", "step": 763, "epoch": 1 }, { "type": "loss", "content": 0.021411392837762833, "timestamp": "2025-09-30 22:13:53.321105", "step": 764, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:53.355219", "step": 764, "epoch": 1 }, { "type": "loss", "content": 0.017297813668847084, "timestamp": "2025-09-30 22:13:53.364951", "step": 765, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 640 ], "flops": 18984411776512 }, "timestamp": "2025-09-30 22:13:53.420524", "step": 765, "epoch": 1 }, { "type": "loss", "content": 0.007814758457243443, "timestamp": "2025-09-30 22:13:53.442153", "step": 766, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:53.492343", "step": 766, "epoch": 1 }, { "type": "loss", "content": 0.00955934077501297, "timestamp": "2025-09-30 22:13:53.502769", "step": 767, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:13:53.541235", "step": 767, "epoch": 1 }, { "type": "loss", "content": 0.013868585228919983, "timestamp": "2025-09-30 22:13:53.567216", "step": 768, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:53.611092", "step": 768, "epoch": 1 }, { "type": "loss", "content": 0.04621420055627823, "timestamp": "2025-09-30 22:13:53.615640", "step": 769, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:53.649584", "step": 769, "epoch": 1 }, { "type": "loss", "content": 0.012514598667621613, "timestamp": "2025-09-30 22:13:53.657151", "step": 770, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:53.694645", "step": 770, "epoch": 1 }, { "type": "loss", "content": 0.02450910024344921, "timestamp": "2025-09-30 22:13:53.701862", "step": 771, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:53.745935", "step": 771, "epoch": 1 }, { "type": "loss", "content": 0.013912992551922798, "timestamp": "2025-09-30 22:13:53.780527", "step": 772, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:53.822118", "step": 772, "epoch": 1 }, { "type": "loss", "content": 0.014330840669572353, "timestamp": "2025-09-30 22:13:53.830134", "step": 773, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:53.873290", "step": 773, "epoch": 1 }, { "type": "loss", "content": 0.009840606711804867, "timestamp": "2025-09-30 22:13:53.886591", "step": 774, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:53.926114", "step": 774, "epoch": 1 }, { "type": "loss", "content": 0.01082277949899435, "timestamp": "2025-09-30 22:13:53.939447", "step": 775, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:53.978363", "step": 775, "epoch": 1 }, { "type": "loss", "content": 0.03701702505350113, "timestamp": "2025-09-30 22:13:54.012899", "step": 776, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:54.055578", "step": 776, "epoch": 1 }, { "type": "loss", "content": 0.017155220732092857, "timestamp": "2025-09-30 22:13:54.068873", "step": 777, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:54.105009", "step": 777, "epoch": 1 }, { "type": "loss", "content": 0.017413174733519554, "timestamp": "2025-09-30 22:13:54.113036", "step": 778, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:54.159895", "step": 778, "epoch": 1 }, { "type": "loss", "content": 0.02033637836575508, "timestamp": "2025-09-30 22:13:54.167541", "step": 779, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:54.203556", "step": 779, "epoch": 1 }, { "type": "loss", "content": 0.021800467744469643, "timestamp": "2025-09-30 22:13:54.236903", "step": 780, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:13:54.288664", "step": 780, "epoch": 1 }, { "type": "loss", "content": 0.010870695114135742, "timestamp": "2025-09-30 22:13:54.304095", "step": 781, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:54.339513", "step": 781, "epoch": 1 }, { "type": "loss", "content": 0.02554204687476158, "timestamp": "2025-09-30 22:13:54.347298", "step": 782, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:13:54.388721", "step": 782, "epoch": 1 }, { "type": "loss", "content": 0.016182314604520798, "timestamp": "2025-09-30 22:13:54.402104", "step": 783, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:54.462080", "step": 783, "epoch": 1 }, { "type": "loss", "content": 0.016113195568323135, "timestamp": "2025-09-30 22:13:54.495450", "step": 784, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:54.532332", "step": 784, "epoch": 1 }, { "type": "loss", "content": 0.010909128934144974, "timestamp": "2025-09-30 22:13:54.540352", "step": 785, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:54.584057", "step": 785, "epoch": 1 }, { "type": "loss", "content": 0.021256666630506516, "timestamp": "2025-09-30 22:13:54.591379", "step": 786, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:13:54.647614", "step": 786, "epoch": 1 }, { "type": "loss", "content": 0.006734177935868502, "timestamp": "2025-09-30 22:13:54.666657", "step": 787, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:54.699301", "step": 787, "epoch": 1 }, { "type": "loss", "content": 0.031248245388269424, "timestamp": "2025-09-30 22:13:54.727530", "step": 788, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:54.769108", "step": 788, "epoch": 1 }, { "type": "loss", "content": 0.012545477598905563, "timestamp": "2025-09-30 22:13:54.774636", "step": 789, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:54.815393", "step": 789, "epoch": 1 }, { "type": "loss", "content": 0.02404787205159664, "timestamp": "2025-09-30 22:13:54.829196", "step": 790, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:54.883566", "step": 790, "epoch": 1 }, { "type": "loss", "content": 0.021722253412008286, "timestamp": "2025-09-30 22:13:54.891348", "step": 791, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:54.931166", "step": 791, "epoch": 1 }, { "type": "loss", "content": 0.02735544927418232, "timestamp": "2025-09-30 22:13:54.960073", "step": 792, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:54.996228", "step": 792, "epoch": 1 }, { "type": "loss", "content": 0.015375000424683094, "timestamp": "2025-09-30 22:13:55.006871", "step": 793, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:55.043368", "step": 793, "epoch": 1 }, { "type": "loss", "content": 0.015890859067440033, "timestamp": "2025-09-30 22:13:55.053473", "step": 794, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:55.085328", "step": 794, "epoch": 1 }, { "type": "loss", "content": 0.029422825202345848, "timestamp": "2025-09-30 22:13:55.093192", "step": 795, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:55.136339", "step": 795, "epoch": 1 }, { "type": "loss", "content": 0.01636488176882267, "timestamp": "2025-09-30 22:13:55.167474", "step": 796, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:55.206512", "step": 796, "epoch": 1 }, { "type": "loss", "content": 0.019432881847023964, "timestamp": "2025-09-30 22:13:55.217005", "step": 797, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:13:55.251024", "step": 797, "epoch": 1 }, { "type": "loss", "content": 0.036731038242578506, "timestamp": "2025-09-30 22:13:55.257974", "step": 798, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:55.292715", "step": 798, "epoch": 1 }, { "type": "loss", "content": 0.025194579735398293, "timestamp": "2025-09-30 22:13:55.300541", "step": 799, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:55.336626", "step": 799, "epoch": 1 }, { "type": "loss", "content": 0.019482208415865898, "timestamp": "2025-09-30 22:13:55.364778", "step": 800, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:55.401924", "step": 800, "epoch": 1 }, { "type": "loss", "content": 0.026140403002500534, "timestamp": "2025-09-30 22:13:55.410604", "step": 801, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:55.443029", "step": 801, "epoch": 1 }, { "type": "loss", "content": 0.024593614041805267, "timestamp": "2025-09-30 22:13:55.451048", "step": 802, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:55.484657", "step": 802, "epoch": 1 }, { "type": "loss", "content": 0.03376416862010956, "timestamp": "2025-09-30 22:13:55.496961", "step": 803, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:55.532399", "step": 803, "epoch": 1 }, { "type": "loss", "content": 0.01586739346385002, "timestamp": "2025-09-30 22:13:55.563784", "step": 804, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:55.597543", "step": 804, "epoch": 1 }, { "type": "loss", "content": 0.019470948725938797, "timestamp": "2025-09-30 22:13:55.605561", "step": 805, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:13:58.084212", "step": 805, "epoch": 1 }, { "type": "pplx", "content": 5.446366031615489, "timestamp": "2025-09-30 22:13:58.086789", "step": 805, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:58.118236", "step": 805, "epoch": 1 }, { "type": "loss", "content": 0.013950072228908539, "timestamp": "2025-09-30 22:13:58.125408", "step": 806, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:13:58.164289", "step": 806, "epoch": 1 }, { "type": "loss", "content": 0.012152438051998615, "timestamp": "2025-09-30 22:13:58.179806", "step": 807, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:58.212773", "step": 807, "epoch": 1 }, { "type": "loss", "content": 0.026414604857563972, "timestamp": "2025-09-30 22:13:58.240797", "step": 808, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:58.275425", "step": 808, "epoch": 1 }, { "type": "loss", "content": 0.012953411787748337, "timestamp": "2025-09-30 22:13:58.288691", "step": 809, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:13:58.335386", "step": 809, "epoch": 1 }, { "type": "loss", "content": 0.005233216565102339, "timestamp": "2025-09-30 22:13:58.352633", "step": 810, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:58.385541", "step": 810, "epoch": 1 }, { "type": "loss", "content": 0.017348093912005424, "timestamp": "2025-09-30 22:13:58.393372", "step": 811, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:58.424947", "step": 811, "epoch": 1 }, { "type": "loss", "content": 0.008451183326542377, "timestamp": "2025-09-30 22:13:58.456178", "step": 812, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:13:58.490172", "step": 812, "epoch": 1 }, { "type": "loss", "content": 0.009706801734864712, "timestamp": "2025-09-30 22:13:58.503224", "step": 813, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:58.543435", "step": 813, "epoch": 1 }, { "type": "loss", "content": 0.016594383865594864, "timestamp": "2025-09-30 22:13:58.554437", "step": 814, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:58.589296", "step": 814, "epoch": 1 }, { "type": "loss", "content": 0.02336704172194004, "timestamp": "2025-09-30 22:13:58.597199", "step": 815, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:58.636105", "step": 815, "epoch": 1 }, { "type": "loss", "content": 0.013078423216938972, "timestamp": "2025-09-30 22:13:58.670959", "step": 816, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:58.707170", "step": 816, "epoch": 1 }, { "type": "loss", "content": 0.009744293987751007, "timestamp": "2025-09-30 22:13:58.717792", "step": 817, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:58.751176", "step": 817, "epoch": 1 }, { "type": "loss", "content": 0.01637791283428669, "timestamp": "2025-09-30 22:13:58.762227", "step": 818, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:13:58.802043", "step": 818, "epoch": 1 }, { "type": "loss", "content": 0.012836102396249771, "timestamp": "2025-09-30 22:13:58.816018", "step": 819, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:58.854037", "step": 819, "epoch": 1 }, { "type": "loss", "content": 0.012655460275709629, "timestamp": "2025-09-30 22:13:58.882444", "step": 820, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:58.922930", "step": 820, "epoch": 1 }, { "type": "loss", "content": 0.015463345684111118, "timestamp": "2025-09-30 22:13:58.933348", "step": 821, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:58.971831", "step": 821, "epoch": 1 }, { "type": "loss", "content": 0.023430321365594864, "timestamp": "2025-09-30 22:13:58.979432", "step": 822, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:59.017428", "step": 822, "epoch": 1 }, { "type": "loss", "content": 0.021295029670000076, "timestamp": "2025-09-30 22:13:59.024919", "step": 823, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:59.058596", "step": 823, "epoch": 1 }, { "type": "loss", "content": 0.010640447959303856, "timestamp": "2025-09-30 22:13:59.089903", "step": 824, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:59.125512", "step": 824, "epoch": 1 }, { "type": "loss", "content": 0.016809722408652306, "timestamp": "2025-09-30 22:13:59.131025", "step": 825, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:59.165649", "step": 825, "epoch": 1 }, { "type": "loss", "content": 0.011587375774979591, "timestamp": "2025-09-30 22:13:59.178196", "step": 826, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:59.216681", "step": 826, "epoch": 1 }, { "type": "loss", "content": 0.015174712985754013, "timestamp": "2025-09-30 22:13:59.229261", "step": 827, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:59.264302", "step": 827, "epoch": 1 }, { "type": "loss", "content": 0.009945693425834179, "timestamp": "2025-09-30 22:13:59.296429", "step": 828, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:59.329270", "step": 828, "epoch": 1 }, { "type": "loss", "content": 0.02524462342262268, "timestamp": "2025-09-30 22:13:59.337420", "step": 829, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:59.375305", "step": 829, "epoch": 1 }, { "type": "loss", "content": 0.017129966989159584, "timestamp": "2025-09-30 22:13:59.386425", "step": 830, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:13:59.425848", "step": 830, "epoch": 1 }, { "type": "loss", "content": 0.012208385393023491, "timestamp": "2025-09-30 22:13:59.439564", "step": 831, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:59.486831", "step": 831, "epoch": 1 }, { "type": "loss", "content": 0.014922388829290867, "timestamp": "2025-09-30 22:13:59.517928", "step": 832, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:59.559079", "step": 832, "epoch": 1 }, { "type": "loss", "content": 0.01866101287305355, "timestamp": "2025-09-30 22:13:59.564409", "step": 833, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:59.600361", "step": 833, "epoch": 1 }, { "type": "loss", "content": 0.009599830955266953, "timestamp": "2025-09-30 22:13:59.611407", "step": 834, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:13:59.644298", "step": 834, "epoch": 1 }, { "type": "loss", "content": 0.03083634376525879, "timestamp": "2025-09-30 22:13:59.654533", "step": 835, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:59.693662", "step": 835, "epoch": 1 }, { "type": "loss", "content": 0.010529820807278156, "timestamp": "2025-09-30 22:13:59.726829", "step": 836, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:13:59.763676", "step": 836, "epoch": 1 }, { "type": "loss", "content": 0.015880852937698364, "timestamp": "2025-09-30 22:13:59.772306", "step": 837, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:13:59.808123", "step": 837, "epoch": 1 }, { "type": "loss", "content": 0.02119717001914978, "timestamp": "2025-09-30 22:13:59.816002", "step": 838, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:13:59.847169", "step": 838, "epoch": 1 }, { "type": "loss", "content": 0.01195271871984005, "timestamp": "2025-09-30 22:13:59.854327", "step": 839, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:13:59.887415", "step": 839, "epoch": 1 }, { "type": "loss", "content": 0.013231132179498672, "timestamp": "2025-09-30 22:13:59.915775", "step": 840, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:13:59.953962", "step": 840, "epoch": 1 }, { "type": "loss", "content": 0.0175318643450737, "timestamp": "2025-09-30 22:13:59.963724", "step": 841, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:13:59.998336", "step": 841, "epoch": 1 }, { "type": "loss", "content": 0.0097137950360775, "timestamp": "2025-09-30 22:14:00.010847", "step": 842, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:00.052076", "step": 842, "epoch": 1 }, { "type": "loss", "content": 0.011096476577222347, "timestamp": "2025-09-30 22:14:00.065803", "step": 843, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:00.101517", "step": 843, "epoch": 1 }, { "type": "loss", "content": 0.02286028116941452, "timestamp": "2025-09-30 22:14:00.130130", "step": 844, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:00.166872", "step": 844, "epoch": 1 }, { "type": "loss", "content": 0.00962063018232584, "timestamp": "2025-09-30 22:14:00.172624", "step": 845, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:00.204656", "step": 845, "epoch": 1 }, { "type": "loss", "content": 0.022263722494244576, "timestamp": "2025-09-30 22:14:00.214984", "step": 846, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:00.247781", "step": 846, "epoch": 1 }, { "type": "loss", "content": 0.00966158602386713, "timestamp": "2025-09-30 22:14:00.258658", "step": 847, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:00.296570", "step": 847, "epoch": 1 }, { "type": "loss", "content": 0.024794315919280052, "timestamp": "2025-09-30 22:14:00.325349", "step": 848, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-30 22:14:00.385396", "step": 848, "epoch": 1 }, { "type": "loss", "content": 0.012166989967226982, "timestamp": "2025-09-30 22:14:00.401736", "step": 849, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:00.439571", "step": 849, "epoch": 1 }, { "type": "loss", "content": 0.008486066944897175, "timestamp": "2025-09-30 22:14:00.452127", "step": 850, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:00.492815", "step": 850, "epoch": 1 }, { "type": "loss", "content": 0.010692514479160309, "timestamp": "2025-09-30 22:14:00.506623", "step": 851, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:00.541829", "step": 851, "epoch": 1 }, { "type": "loss", "content": 0.011581682600080967, "timestamp": "2025-09-30 22:14:00.576075", "step": 852, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:00.625311", "step": 852, "epoch": 1 }, { "type": "loss", "content": 0.016717543825507164, "timestamp": "2025-09-30 22:14:00.635069", "step": 853, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:00.673558", "step": 853, "epoch": 1 }, { "type": "loss", "content": 0.027172185480594635, "timestamp": "2025-09-30 22:14:00.687255", "step": 854, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:00.720660", "step": 854, "epoch": 1 }, { "type": "loss", "content": 0.023855775594711304, "timestamp": "2025-09-30 22:14:00.732711", "step": 855, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:00.764832", "step": 855, "epoch": 1 }, { "type": "loss", "content": 0.018536483868956566, "timestamp": "2025-09-30 22:14:00.797864", "step": 856, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:00.835583", "step": 856, "epoch": 1 }, { "type": "loss", "content": 0.030458739027380943, "timestamp": "2025-09-30 22:14:00.845442", "step": 857, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:00.881114", "step": 857, "epoch": 1 }, { "type": "loss", "content": 0.014870762825012207, "timestamp": "2025-09-30 22:14:00.893227", "step": 858, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:00.930939", "step": 858, "epoch": 1 }, { "type": "loss", "content": 0.017727408558130264, "timestamp": "2025-09-30 22:14:00.944878", "step": 859, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:00.979186", "step": 859, "epoch": 1 }, { "type": "loss", "content": 0.012422531843185425, "timestamp": "2025-09-30 22:14:01.007999", "step": 860, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:01.045455", "step": 860, "epoch": 1 }, { "type": "loss", "content": 0.008018754422664642, "timestamp": "2025-09-30 22:14:01.058828", "step": 861, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:01.095177", "step": 861, "epoch": 1 }, { "type": "loss", "content": 0.01912866346538067, "timestamp": "2025-09-30 22:14:01.107642", "step": 862, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:01.152731", "step": 862, "epoch": 1 }, { "type": "loss", "content": 0.01784403808414936, "timestamp": "2025-09-30 22:14:01.160292", "step": 863, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:01.196779", "step": 863, "epoch": 1 }, { "type": "loss", "content": 0.022543074563145638, "timestamp": "2025-09-30 22:14:01.225476", "step": 864, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:01.258613", "step": 864, "epoch": 1 }, { "type": "loss", "content": 0.014795443043112755, "timestamp": "2025-09-30 22:14:01.269110", "step": 865, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:01.307700", "step": 865, "epoch": 1 }, { "type": "loss", "content": 0.01816575601696968, "timestamp": "2025-09-30 22:14:01.321515", "step": 866, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:01.357297", "step": 866, "epoch": 1 }, { "type": "loss", "content": 0.015042226761579514, "timestamp": "2025-09-30 22:14:01.369627", "step": 867, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:01.402584", "step": 867, "epoch": 1 }, { "type": "loss", "content": 0.010017563588917255, "timestamp": "2025-09-30 22:14:01.435797", "step": 868, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:01.476134", "step": 868, "epoch": 1 }, { "type": "loss", "content": 0.022046193480491638, "timestamp": "2025-09-30 22:14:01.486044", "step": 869, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:01.519190", "step": 869, "epoch": 1 }, { "type": "loss", "content": 0.016485564410686493, "timestamp": "2025-09-30 22:14:01.531321", "step": 870, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:01.573302", "step": 870, "epoch": 1 }, { "type": "loss", "content": 0.014110715128481388, "timestamp": "2025-09-30 22:14:01.584371", "step": 871, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:01.618712", "step": 871, "epoch": 1 }, { "type": "loss", "content": 0.024449339136481285, "timestamp": "2025-09-30 22:14:01.647066", "step": 872, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:01.692885", "step": 872, "epoch": 1 }, { "type": "loss", "content": 0.012250715866684914, "timestamp": "2025-09-30 22:14:01.706201", "step": 873, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:01.742703", "step": 873, "epoch": 1 }, { "type": "loss", "content": 0.015656866133213043, "timestamp": "2025-09-30 22:14:01.756425", "step": 874, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:01.794283", "step": 874, "epoch": 1 }, { "type": "loss", "content": 0.017466336488723755, "timestamp": "2025-09-30 22:14:01.806792", "step": 875, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:01.845856", "step": 875, "epoch": 1 }, { "type": "loss", "content": 0.019561603665351868, "timestamp": "2025-09-30 22:14:01.880085", "step": 876, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:01.920849", "step": 876, "epoch": 1 }, { "type": "loss", "content": 0.012877970933914185, "timestamp": "2025-09-30 22:14:01.930750", "step": 877, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:01.963363", "step": 877, "epoch": 1 }, { "type": "loss", "content": 0.018890580162405968, "timestamp": "2025-09-30 22:14:01.973699", "step": 878, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:02.005660", "step": 878, "epoch": 1 }, { "type": "loss", "content": 0.02163480408489704, "timestamp": "2025-09-30 22:14:02.012736", "step": 879, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:02.052514", "step": 879, "epoch": 1 }, { "type": "loss", "content": 0.008167365565896034, "timestamp": "2025-09-30 22:14:02.087367", "step": 880, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:02.120831", "step": 880, "epoch": 1 }, { "type": "loss", "content": 0.024901211261749268, "timestamp": "2025-09-30 22:14:02.130637", "step": 881, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:02.168472", "step": 881, "epoch": 1 }, { "type": "loss", "content": 0.013065425679087639, "timestamp": "2025-09-30 22:14:02.182132", "step": 882, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:02.216822", "step": 882, "epoch": 1 }, { "type": "loss", "content": 0.012894055806100368, "timestamp": "2025-09-30 22:14:02.229099", "step": 883, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:02.268315", "step": 883, "epoch": 1 }, { "type": "loss", "content": 0.015100360848009586, "timestamp": "2025-09-30 22:14:02.300080", "step": 884, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:02.336762", "step": 884, "epoch": 1 }, { "type": "loss", "content": 0.0276936162263155, "timestamp": "2025-09-30 22:14:02.342318", "step": 885, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:02.380093", "step": 885, "epoch": 1 }, { "type": "loss", "content": 0.012446722947061062, "timestamp": "2025-09-30 22:14:02.393757", "step": 886, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:02.427769", "step": 886, "epoch": 1 }, { "type": "loss", "content": 0.013708610087633133, "timestamp": "2025-09-30 22:14:02.439867", "step": 887, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:02.482510", "step": 887, "epoch": 1 }, { "type": "loss", "content": 0.011930789798498154, "timestamp": "2025-09-30 22:14:02.519009", "step": 888, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:02.552566", "step": 888, "epoch": 1 }, { "type": "loss", "content": 0.016422098502516747, "timestamp": "2025-09-30 22:14:02.562655", "step": 889, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:02.595648", "step": 889, "epoch": 1 }, { "type": "loss", "content": 0.018035048618912697, "timestamp": "2025-09-30 22:14:02.607803", "step": 890, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:02.648594", "step": 890, "epoch": 1 }, { "type": "loss", "content": 0.01066044345498085, "timestamp": "2025-09-30 22:14:02.653057", "step": 891, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:02.686245", "step": 891, "epoch": 1 }, { "type": "loss", "content": 0.015982037410140038, "timestamp": "2025-09-30 22:14:02.714212", "step": 892, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:02.749087", "step": 892, "epoch": 1 }, { "type": "loss", "content": 0.013905158266425133, "timestamp": "2025-09-30 22:14:02.753517", "step": 893, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:02.789733", "step": 893, "epoch": 1 }, { "type": "loss", "content": 0.011530783027410507, "timestamp": "2025-09-30 22:14:02.797570", "step": 894, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:02.836499", "step": 894, "epoch": 1 }, { "type": "loss", "content": 0.01866287738084793, "timestamp": "2025-09-30 22:14:02.846416", "step": 895, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:02.881824", "step": 895, "epoch": 1 }, { "type": "loss", "content": 0.011501681990921497, "timestamp": "2025-09-30 22:14:02.913512", "step": 896, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:02.957801", "step": 896, "epoch": 1 }, { "type": "loss", "content": 0.014294018037617207, "timestamp": "2025-09-30 22:14:02.966162", "step": 897, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:03.002258", "step": 897, "epoch": 1 }, { "type": "loss", "content": 0.01338213961571455, "timestamp": "2025-09-30 22:14:03.015620", "step": 898, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:03.048946", "step": 898, "epoch": 1 }, { "type": "loss", "content": 0.019916707649827003, "timestamp": "2025-09-30 22:14:03.059806", "step": 899, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:03.092905", "step": 899, "epoch": 1 }, { "type": "loss", "content": 0.019757641479372978, "timestamp": "2025-09-30 22:14:03.120774", "step": 900, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:03.153911", "step": 900, "epoch": 1 }, { "type": "loss", "content": 0.01972472108900547, "timestamp": "2025-09-30 22:14:03.162400", "step": 901, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:03.199982", "step": 901, "epoch": 1 }, { "type": "loss", "content": 0.022137483581900597, "timestamp": "2025-09-30 22:14:03.210684", "step": 902, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:03.250364", "step": 902, "epoch": 1 }, { "type": "loss", "content": 0.017975429072976112, "timestamp": "2025-09-30 22:14:03.260436", "step": 903, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:03.293549", "step": 903, "epoch": 1 }, { "type": "loss", "content": 0.020977221429347992, "timestamp": "2025-09-30 22:14:03.321326", "step": 904, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:03.360691", "step": 904, "epoch": 1 }, { "type": "loss", "content": 0.00997572485357523, "timestamp": "2025-09-30 22:14:03.370403", "step": 905, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:03.405374", "step": 905, "epoch": 1 }, { "type": "loss", "content": 0.015123856253921986, "timestamp": "2025-09-30 22:14:03.409939", "step": 906, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:03.447373", "step": 906, "epoch": 1 }, { "type": "loss", "content": 0.016774147748947144, "timestamp": "2025-09-30 22:14:03.455205", "step": 907, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:03.493286", "step": 907, "epoch": 1 }, { "type": "loss", "content": 0.014639715664088726, "timestamp": "2025-09-30 22:14:03.518567", "step": 908, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:03.552368", "step": 908, "epoch": 1 }, { "type": "loss", "content": 0.014328272081911564, "timestamp": "2025-09-30 22:14:03.557041", "step": 909, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:03.590231", "step": 909, "epoch": 1 }, { "type": "loss", "content": 0.02620774507522583, "timestamp": "2025-09-30 22:14:03.597776", "step": 910, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:03.638426", "step": 910, "epoch": 1 }, { "type": "loss", "content": 0.015904365107417107, "timestamp": "2025-09-30 22:14:03.646266", "step": 911, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:03.680075", "step": 911, "epoch": 1 }, { "type": "loss", "content": 0.01991444267332554, "timestamp": "2025-09-30 22:14:03.711154", "step": 912, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:03.747038", "step": 912, "epoch": 1 }, { "type": "loss", "content": 0.01666913367807865, "timestamp": "2025-09-30 22:14:03.754701", "step": 913, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:03.788682", "step": 913, "epoch": 1 }, { "type": "loss", "content": 0.015176458284258842, "timestamp": "2025-09-30 22:14:03.796404", "step": 914, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:14:03.847683", "step": 914, "epoch": 1 }, { "type": "loss", "content": 0.007855835370719433, "timestamp": "2025-09-30 22:14:03.865326", "step": 915, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:03.902357", "step": 915, "epoch": 1 }, { "type": "loss", "content": 0.01512453518807888, "timestamp": "2025-09-30 22:14:03.935847", "step": 916, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:03.980180", "step": 916, "epoch": 1 }, { "type": "loss", "content": 0.018737010657787323, "timestamp": "2025-09-30 22:14:03.992767", "step": 917, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:04.046413", "step": 917, "epoch": 1 }, { "type": "loss", "content": 0.017014063894748688, "timestamp": "2025-09-30 22:14:04.056561", "step": 918, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:04.089726", "step": 918, "epoch": 1 }, { "type": "loss", "content": 0.012434338219463825, "timestamp": "2025-09-30 22:14:04.100068", "step": 919, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:04.139334", "step": 919, "epoch": 1 }, { "type": "loss", "content": 0.010637388564646244, "timestamp": "2025-09-30 22:14:04.168055", "step": 920, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:14:06.753923", "step": 920, "epoch": 1 }, { "type": "pplx", "content": 5.437166011987229, "timestamp": "2025-09-30 22:14:06.757145", "step": 920, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:06.791183", "step": 920, "epoch": 1 }, { "type": "loss", "content": 0.016253093257546425, "timestamp": "2025-09-30 22:14:06.797640", "step": 921, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:06.832012", "step": 921, "epoch": 1 }, { "type": "loss", "content": 0.008861783891916275, "timestamp": "2025-09-30 22:14:06.844066", "step": 922, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:14:06.889231", "step": 922, "epoch": 1 }, { "type": "loss", "content": 0.006681237835437059, "timestamp": "2025-09-30 22:14:06.906336", "step": 923, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:06.938193", "step": 923, "epoch": 1 }, { "type": "loss", "content": 0.005157914943993092, "timestamp": "2025-09-30 22:14:06.969313", "step": 924, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:07.009562", "step": 924, "epoch": 1 }, { "type": "loss", "content": 0.012271142564713955, "timestamp": "2025-09-30 22:14:07.014756", "step": 925, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:07.048654", "step": 925, "epoch": 1 }, { "type": "loss", "content": 0.010279586538672447, "timestamp": "2025-09-30 22:14:07.055987", "step": 926, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:07.091280", "step": 926, "epoch": 1 }, { "type": "loss", "content": 0.017878876999020576, "timestamp": "2025-09-30 22:14:07.101571", "step": 927, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:07.142443", "step": 927, "epoch": 1 }, { "type": "loss", "content": 0.04153796657919884, "timestamp": "2025-09-30 22:14:07.167889", "step": 928, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:07.203007", "step": 928, "epoch": 1 }, { "type": "loss", "content": 0.019228620454669, "timestamp": "2025-09-30 22:14:07.207926", "step": 929, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:07.252292", "step": 929, "epoch": 1 }, { "type": "loss", "content": 0.014299347065389156, "timestamp": "2025-09-30 22:14:07.266084", "step": 930, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:07.304441", "step": 930, "epoch": 1 }, { "type": "loss", "content": 0.005411448422819376, "timestamp": "2025-09-30 22:14:07.311632", "step": 931, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:07.350062", "step": 931, "epoch": 1 }, { "type": "loss", "content": 0.021051695570349693, "timestamp": "2025-09-30 22:14:07.378792", "step": 932, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:07.412185", "step": 932, "epoch": 1 }, { "type": "loss", "content": 0.007767108269035816, "timestamp": "2025-09-30 22:14:07.422580", "step": 933, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:07.456182", "step": 933, "epoch": 1 }, { "type": "loss", "content": 0.02131766267120838, "timestamp": "2025-09-30 22:14:07.460614", "step": 934, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:07.492316", "step": 934, "epoch": 1 }, { "type": "loss", "content": 0.009594579227268696, "timestamp": "2025-09-30 22:14:07.499404", "step": 935, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:07.535128", "step": 935, "epoch": 1 }, { "type": "loss", "content": 0.010118049569427967, "timestamp": "2025-09-30 22:14:07.569331", "step": 936, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:14:07.610894", "step": 936, "epoch": 1 }, { "type": "loss", "content": 0.008677459321916103, "timestamp": "2025-09-30 22:14:07.626273", "step": 937, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:07.661459", "step": 937, "epoch": 1 }, { "type": "loss", "content": 0.018763471394777298, "timestamp": "2025-09-30 22:14:07.671831", "step": 938, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:07.709458", "step": 938, "epoch": 1 }, { "type": "loss", "content": 0.016597526147961617, "timestamp": "2025-09-30 22:14:07.723117", "step": 939, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:14:07.774652", "step": 939, "epoch": 1 }, { "type": "loss", "content": 0.011049261316657066, "timestamp": "2025-09-30 22:14:07.812623", "step": 940, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:07.847425", "step": 940, "epoch": 1 }, { "type": "loss", "content": 0.02055833861231804, "timestamp": "2025-09-30 22:14:07.857235", "step": 941, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:07.890794", "step": 941, "epoch": 1 }, { "type": "loss", "content": 0.01371944323182106, "timestamp": "2025-09-30 22:14:07.901176", "step": 942, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:07.942349", "step": 942, "epoch": 1 }, { "type": "loss", "content": 0.01326888706535101, "timestamp": "2025-09-30 22:14:07.956080", "step": 943, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:07.991352", "step": 943, "epoch": 1 }, { "type": "loss", "content": 0.027512535452842712, "timestamp": "2025-09-30 22:14:08.022584", "step": 944, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:08.057180", "step": 944, "epoch": 1 }, { "type": "loss", "content": 0.014796360395848751, "timestamp": "2025-09-30 22:14:08.065225", "step": 945, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:08.106347", "step": 945, "epoch": 1 }, { "type": "loss", "content": 0.01242104358971119, "timestamp": "2025-09-30 22:14:08.121991", "step": 946, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:08.158128", "step": 946, "epoch": 1 }, { "type": "loss", "content": 0.017054343596100807, "timestamp": "2025-09-30 22:14:08.170708", "step": 947, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:08.211536", "step": 947, "epoch": 1 }, { "type": "loss", "content": 0.016550425440073013, "timestamp": "2025-09-30 22:14:08.246131", "step": 948, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:08.280433", "step": 948, "epoch": 1 }, { "type": "loss", "content": 0.02881843037903309, "timestamp": "2025-09-30 22:14:08.290053", "step": 949, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:08.333504", "step": 949, "epoch": 1 }, { "type": "loss", "content": 0.02783220261335373, "timestamp": "2025-09-30 22:14:08.340996", "step": 950, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:08.377478", "step": 950, "epoch": 1 }, { "type": "loss", "content": 0.013467947021126747, "timestamp": "2025-09-30 22:14:08.385268", "step": 951, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:08.423240", "step": 951, "epoch": 1 }, { "type": "loss", "content": 0.02384248748421669, "timestamp": "2025-09-30 22:14:08.451107", "step": 952, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:08.492528", "step": 952, "epoch": 1 }, { "type": "loss", "content": 0.016103466972708702, "timestamp": "2025-09-30 22:14:08.497755", "step": 953, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:08.537106", "step": 953, "epoch": 1 }, { "type": "loss", "content": 0.017157990485429764, "timestamp": "2025-09-30 22:14:08.547992", "step": 954, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:08.583104", "step": 954, "epoch": 1 }, { "type": "loss", "content": 0.023314381018280983, "timestamp": "2025-09-30 22:14:08.590843", "step": 955, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:08.628707", "step": 955, "epoch": 1 }, { "type": "loss", "content": 0.013151245191693306, "timestamp": "2025-09-30 22:14:08.660630", "step": 956, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:08.692364", "step": 956, "epoch": 1 }, { "type": "loss", "content": 0.010166237130761147, "timestamp": "2025-09-30 22:14:08.702583", "step": 957, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:08.738726", "step": 957, "epoch": 1 }, { "type": "loss", "content": 0.01939929835498333, "timestamp": "2025-09-30 22:14:08.749959", "step": 958, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:08.785551", "step": 958, "epoch": 1 }, { "type": "loss", "content": 0.016085408627986908, "timestamp": "2025-09-30 22:14:08.798115", "step": 959, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:08.833039", "step": 959, "epoch": 1 }, { "type": "loss", "content": 0.020628873258829117, "timestamp": "2025-09-30 22:14:08.861350", "step": 960, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:08.898337", "step": 960, "epoch": 1 }, { "type": "loss", "content": 0.016140276566147804, "timestamp": "2025-09-30 22:14:08.906784", "step": 961, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:08.945578", "step": 961, "epoch": 1 }, { "type": "loss", "content": 0.012015962973237038, "timestamp": "2025-09-30 22:14:08.952890", "step": 962, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:08.992178", "step": 962, "epoch": 1 }, { "type": "loss", "content": 0.02607676386833191, "timestamp": "2025-09-30 22:14:09.003105", "step": 963, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:09.038816", "step": 963, "epoch": 1 }, { "type": "loss", "content": 0.011443495750427246, "timestamp": "2025-09-30 22:14:09.066628", "step": 964, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:09.116595", "step": 964, "epoch": 1 }, { "type": "loss", "content": 0.015275280922651291, "timestamp": "2025-09-30 22:14:09.129973", "step": 965, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:09.163837", "step": 965, "epoch": 1 }, { "type": "loss", "content": 0.01944708079099655, "timestamp": "2025-09-30 22:14:09.171904", "step": 966, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:09.210415", "step": 966, "epoch": 1 }, { "type": "loss", "content": 0.021834930405020714, "timestamp": "2025-09-30 22:14:09.217237", "step": 967, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:09.262204", "step": 967, "epoch": 1 }, { "type": "loss", "content": 0.010521315038204193, "timestamp": "2025-09-30 22:14:09.296805", "step": 968, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:09.332128", "step": 968, "epoch": 1 }, { "type": "loss", "content": 0.015339354053139687, "timestamp": "2025-09-30 22:14:09.344689", "step": 969, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:09.391933", "step": 969, "epoch": 1 }, { "type": "loss", "content": 0.0168760959059, "timestamp": "2025-09-30 22:14:09.404235", "step": 970, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:09.437467", "step": 970, "epoch": 1 }, { "type": "loss", "content": 0.026797406375408173, "timestamp": "2025-09-30 22:14:09.449979", "step": 971, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:09.488520", "step": 971, "epoch": 1 }, { "type": "loss", "content": 0.016474634408950806, "timestamp": "2025-09-30 22:14:09.523156", "step": 972, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-30 22:14:09.569953", "step": 972, "epoch": 1 }, { "type": "loss", "content": 0.00674683041870594, "timestamp": "2025-09-30 22:14:09.589236", "step": 973, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:09.631120", "step": 973, "epoch": 1 }, { "type": "loss", "content": 0.013944888487458229, "timestamp": "2025-09-30 22:14:09.646707", "step": 974, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:09.681364", "step": 974, "epoch": 1 }, { "type": "loss", "content": 0.01417811494320631, "timestamp": "2025-09-30 22:14:09.692359", "step": 975, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:09.733428", "step": 975, "epoch": 1 }, { "type": "loss", "content": 0.019666852429509163, "timestamp": "2025-09-30 22:14:09.767942", "step": 976, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:09.800486", "step": 976, "epoch": 1 }, { "type": "loss", "content": 0.013981659896671772, "timestamp": "2025-09-30 22:14:09.806180", "step": 977, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:09.848161", "step": 977, "epoch": 1 }, { "type": "loss", "content": 0.01465094555169344, "timestamp": "2025-09-30 22:14:09.860685", "step": 978, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:09.902854", "step": 978, "epoch": 1 }, { "type": "loss", "content": 0.009940768592059612, "timestamp": "2025-09-30 22:14:09.918442", "step": 979, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:09.967549", "step": 979, "epoch": 1 }, { "type": "loss", "content": 0.006746287923306227, "timestamp": "2025-09-30 22:14:10.002313", "step": 980, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:10.040655", "step": 980, "epoch": 1 }, { "type": "loss", "content": 0.03245190531015396, "timestamp": "2025-09-30 22:14:10.049316", "step": 981, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:10.084868", "step": 981, "epoch": 1 }, { "type": "loss", "content": 0.015351356938481331, "timestamp": "2025-09-30 22:14:10.098249", "step": 982, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:10.131692", "step": 982, "epoch": 1 }, { "type": "loss", "content": 0.03203147277235985, "timestamp": "2025-09-30 22:14:10.138649", "step": 983, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:10.172607", "step": 983, "epoch": 1 }, { "type": "loss", "content": 0.012618161737918854, "timestamp": "2025-09-30 22:14:10.203729", "step": 984, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:10.245929", "step": 984, "epoch": 1 }, { "type": "loss", "content": 0.008273485116660595, "timestamp": "2025-09-30 22:14:10.256392", "step": 985, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:10.292334", "step": 985, "epoch": 1 }, { "type": "loss", "content": 0.012804933823645115, "timestamp": "2025-09-30 22:14:10.299541", "step": 986, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:10.332614", "step": 986, "epoch": 1 }, { "type": "loss", "content": 0.025396456941962242, "timestamp": "2025-09-30 22:14:10.343503", "step": 987, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:14:10.395794", "step": 987, "epoch": 1 }, { "type": "loss", "content": 0.019918175414204597, "timestamp": "2025-09-30 22:14:10.435706", "step": 988, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:10.469009", "step": 988, "epoch": 1 }, { "type": "loss", "content": 0.017784535884857178, "timestamp": "2025-09-30 22:14:10.477776", "step": 989, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:10.515598", "step": 989, "epoch": 1 }, { "type": "loss", "content": 0.010058222338557243, "timestamp": "2025-09-30 22:14:10.522721", "step": 990, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:14:10.571820", "step": 990, "epoch": 1 }, { "type": "loss", "content": 0.00819582398980856, "timestamp": "2025-09-30 22:14:10.588129", "step": 991, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:10.624132", "step": 991, "epoch": 1 }, { "type": "loss", "content": 0.01214824989438057, "timestamp": "2025-09-30 22:14:10.650659", "step": 992, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:10.686899", "step": 992, "epoch": 1 }, { "type": "loss", "content": 0.010886823758482933, "timestamp": "2025-09-30 22:14:10.696450", "step": 993, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:10.729350", "step": 993, "epoch": 1 }, { "type": "loss", "content": 0.013221736997365952, "timestamp": "2025-09-30 22:14:10.737085", "step": 994, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:10.772734", "step": 994, "epoch": 1 }, { "type": "loss", "content": 0.015556025318801403, "timestamp": "2025-09-30 22:14:10.780494", "step": 995, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:10.831426", "step": 995, "epoch": 1 }, { "type": "loss", "content": 0.014741851948201656, "timestamp": "2025-09-30 22:14:10.865611", "step": 996, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:10.897764", "step": 996, "epoch": 1 }, { "type": "loss", "content": 0.014557472430169582, "timestamp": "2025-09-30 22:14:10.903148", "step": 997, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:10.936950", "step": 997, "epoch": 1 }, { "type": "loss", "content": 0.011851382441818714, "timestamp": "2025-09-30 22:14:10.944239", "step": 998, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:10.991666", "step": 998, "epoch": 1 }, { "type": "loss", "content": 0.01922006718814373, "timestamp": "2025-09-30 22:14:10.999261", "step": 999, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:11.033925", "step": 999, "epoch": 1 }, { "type": "loss", "content": 0.016480710357427597, "timestamp": "2025-09-30 22:14:11.065573", "step": 1000, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 1000", "timestamp": "2025-09-30 22:14:16.235356", "step": 1000, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:16.274591", "step": 1000, "epoch": 1 }, { "type": "loss", "content": 0.027280423790216446, "timestamp": "2025-09-30 22:14:16.278282", "step": 1001, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:16.318809", "step": 1001, "epoch": 1 }, { "type": "loss", "content": 0.023930471390485764, "timestamp": "2025-09-30 22:14:16.329324", "step": 1002, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:14:16.379352", "step": 1002, "epoch": 1 }, { "type": "loss", "content": 0.02310759946703911, "timestamp": "2025-09-30 22:14:16.383280", "step": 1003, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:16.426238", "step": 1003, "epoch": 1 }, { "type": "loss", "content": 0.019107503816485405, "timestamp": "2025-09-30 22:14:16.463706", "step": 1004, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:16.507751", "step": 1004, "epoch": 1 }, { "type": "loss", "content": 0.01613452471792698, "timestamp": "2025-09-30 22:14:16.512275", "step": 1005, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:16.552761", "step": 1005, "epoch": 1 }, { "type": "loss", "content": 0.025932583957910538, "timestamp": "2025-09-30 22:14:16.557234", "step": 1006, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:16.597795", "step": 1006, "epoch": 1 }, { "type": "loss", "content": 0.017133804038167, "timestamp": "2025-09-30 22:14:16.607631", "step": 1007, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:16.659475", "step": 1007, "epoch": 1 }, { "type": "loss", "content": 0.006820287089794874, "timestamp": "2025-09-30 22:14:16.694159", "step": 1008, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:16.729978", "step": 1008, "epoch": 1 }, { "type": "loss", "content": 0.00994724128395319, "timestamp": "2025-09-30 22:14:16.743007", "step": 1009, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:16.781840", "step": 1009, "epoch": 1 }, { "type": "loss", "content": 0.015249098651111126, "timestamp": "2025-09-30 22:14:16.788935", "step": 1010, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:16.823322", "step": 1010, "epoch": 1 }, { "type": "loss", "content": 0.02501874603331089, "timestamp": "2025-09-30 22:14:16.830721", "step": 1011, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:16.870840", "step": 1011, "epoch": 1 }, { "type": "loss", "content": 0.017700329422950745, "timestamp": "2025-09-30 22:14:16.902658", "step": 1012, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:16.939259", "step": 1012, "epoch": 1 }, { "type": "loss", "content": 0.0090402290225029, "timestamp": "2025-09-30 22:14:16.947896", "step": 1013, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:17.001464", "step": 1013, "epoch": 1 }, { "type": "loss", "content": 0.018181640654802322, "timestamp": "2025-09-30 22:14:17.012675", "step": 1014, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:17.053290", "step": 1014, "epoch": 1 }, { "type": "loss", "content": 0.017796620726585388, "timestamp": "2025-09-30 22:14:17.066946", "step": 1015, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:17.105038", "step": 1015, "epoch": 1 }, { "type": "loss", "content": 0.013271898031234741, "timestamp": "2025-09-30 22:14:17.137089", "step": 1016, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:17.178282", "step": 1016, "epoch": 1 }, { "type": "loss", "content": 0.038167428225278854, "timestamp": "2025-09-30 22:14:17.183741", "step": 1017, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:17.220171", "step": 1017, "epoch": 1 }, { "type": "loss", "content": 0.019977156072854996, "timestamp": "2025-09-30 22:14:17.227610", "step": 1018, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:17.267100", "step": 1018, "epoch": 1 }, { "type": "loss", "content": 0.011846660636365414, "timestamp": "2025-09-30 22:14:17.281116", "step": 1019, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 592 ], "flops": 17560600598464 }, "timestamp": "2025-09-30 22:14:17.338390", "step": 1019, "epoch": 1 }, { "type": "loss", "content": 0.007278237491846085, "timestamp": "2025-09-30 22:14:17.380347", "step": 1020, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:17.419895", "step": 1020, "epoch": 1 }, { "type": "loss", "content": 0.025377754122018814, "timestamp": "2025-09-30 22:14:17.425261", "step": 1021, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:17.461539", "step": 1021, "epoch": 1 }, { "type": "loss", "content": 0.021037183701992035, "timestamp": "2025-09-30 22:14:17.475197", "step": 1022, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:17.511585", "step": 1022, "epoch": 1 }, { "type": "loss", "content": 0.015959495678544044, "timestamp": "2025-09-30 22:14:17.523649", "step": 1023, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:17.564747", "step": 1023, "epoch": 1 }, { "type": "loss", "content": 0.019597096368670464, "timestamp": "2025-09-30 22:14:17.595875", "step": 1024, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:17.630134", "step": 1024, "epoch": 1 }, { "type": "loss", "content": 0.01724843867123127, "timestamp": "2025-09-30 22:14:17.635208", "step": 1025, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:17.672197", "step": 1025, "epoch": 1 }, { "type": "loss", "content": 0.012914427556097507, "timestamp": "2025-09-30 22:14:17.679794", "step": 1026, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:17.715518", "step": 1026, "epoch": 1 }, { "type": "loss", "content": 0.021281026303768158, "timestamp": "2025-09-30 22:14:17.722737", "step": 1027, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:17.760909", "step": 1027, "epoch": 1 }, { "type": "loss", "content": 0.016101287677884102, "timestamp": "2025-09-30 22:14:17.794311", "step": 1028, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:17.829972", "step": 1028, "epoch": 1 }, { "type": "loss", "content": 0.029025474563241005, "timestamp": "2025-09-30 22:14:17.838718", "step": 1029, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:17.878883", "step": 1029, "epoch": 1 }, { "type": "loss", "content": 0.01193151529878378, "timestamp": "2025-09-30 22:14:17.891444", "step": 1030, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:17.934011", "step": 1030, "epoch": 1 }, { "type": "loss", "content": 0.016847174614667892, "timestamp": "2025-09-30 22:14:17.941620", "step": 1031, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:17.982090", "step": 1031, "epoch": 1 }, { "type": "loss", "content": 0.021021408960223198, "timestamp": "2025-09-30 22:14:18.013199", "step": 1032, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:14:18.045658", "step": 1032, "epoch": 1 }, { "type": "loss", "content": 0.030079005286097527, "timestamp": "2025-09-30 22:14:18.048279", "step": 1033, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:18.087168", "step": 1033, "epoch": 1 }, { "type": "loss", "content": 0.012578541412949562, "timestamp": "2025-09-30 22:14:18.094750", "step": 1034, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:18.137064", "step": 1034, "epoch": 1 }, { "type": "loss", "content": 0.008698937483131886, "timestamp": "2025-09-30 22:14:18.150393", "step": 1035, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:14:20.662055", "step": 1035, "epoch": 1 }, { "type": "pplx", "content": 5.4880656407917385, "timestamp": "2025-09-30 22:14:20.666086", "step": 1035, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:20.697900", "step": 1035, "epoch": 1 }, { "type": "loss", "content": 0.022248901426792145, "timestamp": "2025-09-30 22:14:20.725630", "step": 1036, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:20.759674", "step": 1036, "epoch": 1 }, { "type": "loss", "content": 0.018398774787783623, "timestamp": "2025-09-30 22:14:20.767564", "step": 1037, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:20.801646", "step": 1037, "epoch": 1 }, { "type": "loss", "content": 0.01914854720234871, "timestamp": "2025-09-30 22:14:20.809670", "step": 1038, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:20.842123", "step": 1038, "epoch": 1 }, { "type": "loss", "content": 0.013537915423512459, "timestamp": "2025-09-30 22:14:20.854234", "step": 1039, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:20.890682", "step": 1039, "epoch": 1 }, { "type": "loss", "content": 0.013754663057625294, "timestamp": "2025-09-30 22:14:20.924934", "step": 1040, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:20.958233", "step": 1040, "epoch": 1 }, { "type": "loss", "content": 0.015263702720403671, "timestamp": "2025-09-30 22:14:20.963757", "step": 1041, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:20.998954", "step": 1041, "epoch": 1 }, { "type": "loss", "content": 0.015004323795437813, "timestamp": "2025-09-30 22:14:21.006844", "step": 1042, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:21.040676", "step": 1042, "epoch": 1 }, { "type": "loss", "content": 0.009397340938448906, "timestamp": "2025-09-30 22:14:21.048606", "step": 1043, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:21.086414", "step": 1043, "epoch": 1 }, { "type": "loss", "content": 0.011286666616797447, "timestamp": "2025-09-30 22:14:21.119836", "step": 1044, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:21.161164", "step": 1044, "epoch": 1 }, { "type": "loss", "content": 0.008173218928277493, "timestamp": "2025-09-30 22:14:21.176278", "step": 1045, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:21.214987", "step": 1045, "epoch": 1 }, { "type": "loss", "content": 0.013183176517486572, "timestamp": "2025-09-30 22:14:21.228663", "step": 1046, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:21.261374", "step": 1046, "epoch": 1 }, { "type": "loss", "content": 0.020087797194719315, "timestamp": "2025-09-30 22:14:21.268268", "step": 1047, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:14:21.312141", "step": 1047, "epoch": 1 }, { "type": "loss", "content": 0.005610055290162563, "timestamp": "2025-09-30 22:14:21.349422", "step": 1048, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:21.382345", "step": 1048, "epoch": 1 }, { "type": "loss", "content": 0.01692376285791397, "timestamp": "2025-09-30 22:14:21.394954", "step": 1049, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:21.433831", "step": 1049, "epoch": 1 }, { "type": "loss", "content": 0.008124461397528648, "timestamp": "2025-09-30 22:14:21.447604", "step": 1050, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:21.490576", "step": 1050, "epoch": 1 }, { "type": "loss", "content": 0.014909453690052032, "timestamp": "2025-09-30 22:14:21.498319", "step": 1051, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:21.534871", "step": 1051, "epoch": 1 }, { "type": "loss", "content": 0.011585352011024952, "timestamp": "2025-09-30 22:14:21.569621", "step": 1052, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:21.602979", "step": 1052, "epoch": 1 }, { "type": "loss", "content": 0.020000595599412918, "timestamp": "2025-09-30 22:14:21.608170", "step": 1053, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:21.640600", "step": 1053, "epoch": 1 }, { "type": "loss", "content": 0.006880574394017458, "timestamp": "2025-09-30 22:14:21.648492", "step": 1054, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:21.681540", "step": 1054, "epoch": 1 }, { "type": "loss", "content": 0.01206766813993454, "timestamp": "2025-09-30 22:14:21.691869", "step": 1055, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:21.726364", "step": 1055, "epoch": 1 }, { "type": "loss", "content": 0.0255148746073246, "timestamp": "2025-09-30 22:14:21.759671", "step": 1056, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:14:21.800043", "step": 1056, "epoch": 1 }, { "type": "loss", "content": 0.02468947134912014, "timestamp": "2025-09-30 22:14:21.815889", "step": 1057, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:21.850739", "step": 1057, "epoch": 1 }, { "type": "loss", "content": 0.02892993949353695, "timestamp": "2025-09-30 22:14:21.861195", "step": 1058, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:21.894119", "step": 1058, "epoch": 1 }, { "type": "loss", "content": 0.018823428079485893, "timestamp": "2025-09-30 22:14:21.902073", "step": 1059, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:21.939819", "step": 1059, "epoch": 1 }, { "type": "loss", "content": 0.01066051796078682, "timestamp": "2025-09-30 22:14:21.968210", "step": 1060, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:22.003935", "step": 1060, "epoch": 1 }, { "type": "loss", "content": 0.015652211382985115, "timestamp": "2025-09-30 22:14:22.017042", "step": 1061, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:22.055062", "step": 1061, "epoch": 1 }, { "type": "loss", "content": 0.019767604768276215, "timestamp": "2025-09-30 22:14:22.061976", "step": 1062, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:22.097613", "step": 1062, "epoch": 1 }, { "type": "loss", "content": 0.039111003279685974, "timestamp": "2025-09-30 22:14:22.102027", "step": 1063, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:22.139543", "step": 1063, "epoch": 1 }, { "type": "loss", "content": 0.018683698028326035, "timestamp": "2025-09-30 22:14:22.171464", "step": 1064, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:22.209539", "step": 1064, "epoch": 1 }, { "type": "loss", "content": 0.010801514610648155, "timestamp": "2025-09-30 22:14:22.215028", "step": 1065, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:22.250031", "step": 1065, "epoch": 1 }, { "type": "loss", "content": 0.02404649555683136, "timestamp": "2025-09-30 22:14:22.260474", "step": 1066, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:22.293860", "step": 1066, "epoch": 1 }, { "type": "loss", "content": 0.03047175146639347, "timestamp": "2025-09-30 22:14:22.301612", "step": 1067, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:22.353397", "step": 1067, "epoch": 1 }, { "type": "loss", "content": 0.007441402412950993, "timestamp": "2025-09-30 22:14:22.387552", "step": 1068, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:22.422255", "step": 1068, "epoch": 1 }, { "type": "loss", "content": 0.02320132404565811, "timestamp": "2025-09-30 22:14:22.430206", "step": 1069, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:22.465093", "step": 1069, "epoch": 1 }, { "type": "loss", "content": 0.010687381029129028, "timestamp": "2025-09-30 22:14:22.472739", "step": 1070, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:22.508105", "step": 1070, "epoch": 1 }, { "type": "loss", "content": 0.012789708562195301, "timestamp": "2025-09-30 22:14:22.512548", "step": 1071, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:22.544388", "step": 1071, "epoch": 1 }, { "type": "loss", "content": 0.018357696011662483, "timestamp": "2025-09-30 22:14:22.575551", "step": 1072, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:22.610625", "step": 1072, "epoch": 1 }, { "type": "loss", "content": 0.031760621815919876, "timestamp": "2025-09-30 22:14:22.614326", "step": 1073, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:22.650908", "step": 1073, "epoch": 1 }, { "type": "loss", "content": 0.02676416002213955, "timestamp": "2025-09-30 22:14:22.657894", "step": 1074, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:14:22.691092", "step": 1074, "epoch": 1 }, { "type": "loss", "content": 0.07043999433517456, "timestamp": "2025-09-30 22:14:22.695307", "step": 1075, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:22.729537", "step": 1075, "epoch": 1 }, { "type": "loss", "content": 0.007570445071905851, "timestamp": "2025-09-30 22:14:22.763005", "step": 1076, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:14:22.795821", "step": 1076, "epoch": 1 }, { "type": "loss", "content": 0.015886131674051285, "timestamp": "2025-09-30 22:14:22.797984", "step": 1077, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:22.835782", "step": 1077, "epoch": 1 }, { "type": "loss", "content": 0.018651477992534637, "timestamp": "2025-09-30 22:14:22.842026", "step": 1078, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:22.873555", "step": 1078, "epoch": 1 }, { "type": "loss", "content": 0.01997782289981842, "timestamp": "2025-09-30 22:14:22.878077", "step": 1079, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:22.912966", "step": 1079, "epoch": 1 }, { "type": "loss", "content": 0.03567449748516083, "timestamp": "2025-09-30 22:14:22.941766", "step": 1080, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:22.975113", "step": 1080, "epoch": 1 }, { "type": "loss", "content": 0.01590901054441929, "timestamp": "2025-09-30 22:14:22.980351", "step": 1081, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:23.013375", "step": 1081, "epoch": 1 }, { "type": "loss", "content": 0.013110446743667126, "timestamp": "2025-09-30 22:14:23.020551", "step": 1082, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:23.052080", "step": 1082, "epoch": 1 }, { "type": "loss", "content": 0.014219080097973347, "timestamp": "2025-09-30 22:14:23.059653", "step": 1083, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:23.090820", "step": 1083, "epoch": 1 }, { "type": "loss", "content": 0.02950241044163704, "timestamp": "2025-09-30 22:14:23.118585", "step": 1084, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:23.151869", "step": 1084, "epoch": 1 }, { "type": "loss", "content": 0.025333222001791, "timestamp": "2025-09-30 22:14:23.156496", "step": 1085, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:23.192823", "step": 1085, "epoch": 1 }, { "type": "loss", "content": 0.03170863166451454, "timestamp": "2025-09-30 22:14:23.203879", "step": 1086, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:23.247110", "step": 1086, "epoch": 1 }, { "type": "loss", "content": 0.022048788145184517, "timestamp": "2025-09-30 22:14:23.259463", "step": 1087, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:23.297996", "step": 1087, "epoch": 1 }, { "type": "loss", "content": 0.014543569646775723, "timestamp": "2025-09-30 22:14:23.332845", "step": 1088, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:23.366483", "step": 1088, "epoch": 1 }, { "type": "loss", "content": 0.022739170119166374, "timestamp": "2025-09-30 22:14:23.376429", "step": 1089, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:23.412118", "step": 1089, "epoch": 1 }, { "type": "loss", "content": 0.01327939610928297, "timestamp": "2025-09-30 22:14:23.423364", "step": 1090, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:23.454545", "step": 1090, "epoch": 1 }, { "type": "loss", "content": 0.02299661375582218, "timestamp": "2025-09-30 22:14:23.465036", "step": 1091, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:23.497521", "step": 1091, "epoch": 1 }, { "type": "loss", "content": 0.04441500082612038, "timestamp": "2025-09-30 22:14:23.525608", "step": 1092, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:23.567168", "step": 1092, "epoch": 1 }, { "type": "loss", "content": 0.010146488435566425, "timestamp": "2025-09-30 22:14:23.582237", "step": 1093, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:23.624756", "step": 1093, "epoch": 1 }, { "type": "loss", "content": 0.00972510315477848, "timestamp": "2025-09-30 22:14:23.638585", "step": 1094, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:23.671340", "step": 1094, "epoch": 1 }, { "type": "loss", "content": 0.028590157628059387, "timestamp": "2025-09-30 22:14:23.681833", "step": 1095, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:23.717589", "step": 1095, "epoch": 1 }, { "type": "loss", "content": 0.014557278715074062, "timestamp": "2025-09-30 22:14:23.746408", "step": 1096, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:23.780576", "step": 1096, "epoch": 1 }, { "type": "loss", "content": 0.022265970706939697, "timestamp": "2025-09-30 22:14:23.785867", "step": 1097, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:23.820745", "step": 1097, "epoch": 1 }, { "type": "loss", "content": 0.025301869958639145, "timestamp": "2025-09-30 22:14:23.828313", "step": 1098, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:23.867633", "step": 1098, "epoch": 1 }, { "type": "loss", "content": 0.024350661784410477, "timestamp": "2025-09-30 22:14:23.881637", "step": 1099, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:23.914261", "step": 1099, "epoch": 1 }, { "type": "loss", "content": 0.016744161024689674, "timestamp": "2025-09-30 22:14:23.942857", "step": 1100, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:14:23.990841", "step": 1100, "epoch": 1 }, { "type": "loss", "content": 0.01173662394285202, "timestamp": "2025-09-30 22:14:24.007775", "step": 1101, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:24.045520", "step": 1101, "epoch": 1 }, { "type": "loss", "content": 0.007207825314253569, "timestamp": "2025-09-30 22:14:24.053476", "step": 1102, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:24.096192", "step": 1102, "epoch": 1 }, { "type": "loss", "content": 0.028237415477633476, "timestamp": "2025-09-30 22:14:24.103158", "step": 1103, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:24.138708", "step": 1103, "epoch": 1 }, { "type": "loss", "content": 0.014486867934465408, "timestamp": "2025-09-30 22:14:24.167034", "step": 1104, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:24.203458", "step": 1104, "epoch": 1 }, { "type": "loss", "content": 0.021397685632109642, "timestamp": "2025-09-30 22:14:24.209080", "step": 1105, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:24.240322", "step": 1105, "epoch": 1 }, { "type": "loss", "content": 0.01721569150686264, "timestamp": "2025-09-30 22:14:24.247564", "step": 1106, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:24.281449", "step": 1106, "epoch": 1 }, { "type": "loss", "content": 0.01823001727461815, "timestamp": "2025-09-30 22:14:24.293727", "step": 1107, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:24.334220", "step": 1107, "epoch": 1 }, { "type": "loss", "content": 0.01598893292248249, "timestamp": "2025-09-30 22:14:24.362286", "step": 1108, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:24.396173", "step": 1108, "epoch": 1 }, { "type": "loss", "content": 0.02451957017183304, "timestamp": "2025-09-30 22:14:24.401384", "step": 1109, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:24.434371", "step": 1109, "epoch": 1 }, { "type": "loss", "content": 0.01124908123165369, "timestamp": "2025-09-30 22:14:24.446933", "step": 1110, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:24.478816", "step": 1110, "epoch": 1 }, { "type": "loss", "content": 0.02102508395910263, "timestamp": "2025-09-30 22:14:24.489038", "step": 1111, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:24.520432", "step": 1111, "epoch": 1 }, { "type": "loss", "content": 0.0185481458902359, "timestamp": "2025-09-30 22:14:24.548176", "step": 1112, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:24.593310", "step": 1112, "epoch": 1 }, { "type": "loss", "content": 0.01727437786757946, "timestamp": "2025-09-30 22:14:24.606416", "step": 1113, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:24.641066", "step": 1113, "epoch": 1 }, { "type": "loss", "content": 0.017133377492427826, "timestamp": "2025-09-30 22:14:24.648033", "step": 1114, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:24.682384", "step": 1114, "epoch": 1 }, { "type": "loss", "content": 0.028735356405377388, "timestamp": "2025-09-30 22:14:24.692630", "step": 1115, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:24.733377", "step": 1115, "epoch": 1 }, { "type": "loss", "content": 0.03614767640829086, "timestamp": "2025-09-30 22:14:24.760995", "step": 1116, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:14:24.804627", "step": 1116, "epoch": 1 }, { "type": "loss", "content": 0.00506022060289979, "timestamp": "2025-09-30 22:14:24.820451", "step": 1117, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:24.855854", "step": 1117, "epoch": 1 }, { "type": "loss", "content": 0.025322485715150833, "timestamp": "2025-09-30 22:14:24.862801", "step": 1118, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:24.900363", "step": 1118, "epoch": 1 }, { "type": "loss", "content": 0.01264124270528555, "timestamp": "2025-09-30 22:14:24.912983", "step": 1119, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:24.957337", "step": 1119, "epoch": 1 }, { "type": "loss", "content": 0.03440183028578758, "timestamp": "2025-09-30 22:14:24.988650", "step": 1120, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:25.023799", "step": 1120, "epoch": 1 }, { "type": "loss", "content": 0.016403350979089737, "timestamp": "2025-09-30 22:14:25.037178", "step": 1121, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:25.075003", "step": 1121, "epoch": 1 }, { "type": "loss", "content": 0.007725847885012627, "timestamp": "2025-09-30 22:14:25.089002", "step": 1122, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:25.124913", "step": 1122, "epoch": 1 }, { "type": "loss", "content": 0.04689624905586243, "timestamp": "2025-09-30 22:14:25.137259", "step": 1123, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:25.174114", "step": 1123, "epoch": 1 }, { "type": "loss", "content": 0.03898750990629196, "timestamp": "2025-09-30 22:14:25.203193", "step": 1124, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:25.240590", "step": 1124, "epoch": 1 }, { "type": "loss", "content": 0.013921770267188549, "timestamp": "2025-09-30 22:14:25.253914", "step": 1125, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:25.288376", "step": 1125, "epoch": 1 }, { "type": "loss", "content": 0.012004831805825233, "timestamp": "2025-09-30 22:14:25.300924", "step": 1126, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:25.344075", "step": 1126, "epoch": 1 }, { "type": "loss", "content": 0.03051520325243473, "timestamp": "2025-09-30 22:14:25.359653", "step": 1127, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:25.393904", "step": 1127, "epoch": 1 }, { "type": "loss", "content": 0.025966256856918335, "timestamp": "2025-09-30 22:14:25.422752", "step": 1128, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:25.457467", "step": 1128, "epoch": 1 }, { "type": "loss", "content": 0.008847977966070175, "timestamp": "2025-09-30 22:14:25.470427", "step": 1129, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:25.506799", "step": 1129, "epoch": 1 }, { "type": "loss", "content": 0.011017872951924801, "timestamp": "2025-09-30 22:14:25.520608", "step": 1130, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:25.554943", "step": 1130, "epoch": 1 }, { "type": "loss", "content": 0.03611636534333229, "timestamp": "2025-09-30 22:14:25.562121", "step": 1131, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:25.596341", "step": 1131, "epoch": 1 }, { "type": "loss", "content": 0.014314553700387478, "timestamp": "2025-09-30 22:14:25.630509", "step": 1132, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:25.668815", "step": 1132, "epoch": 1 }, { "type": "loss", "content": 0.018850572407245636, "timestamp": "2025-09-30 22:14:25.679336", "step": 1133, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:25.718865", "step": 1133, "epoch": 1 }, { "type": "loss", "content": 0.02781030349433422, "timestamp": "2025-09-30 22:14:25.731201", "step": 1134, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:25.767129", "step": 1134, "epoch": 1 }, { "type": "loss", "content": 0.009797481819987297, "timestamp": "2025-09-30 22:14:25.778366", "step": 1135, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:25.816442", "step": 1135, "epoch": 1 }, { "type": "loss", "content": 0.02433471381664276, "timestamp": "2025-09-30 22:14:25.844584", "step": 1136, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:25.875939", "step": 1136, "epoch": 1 }, { "type": "loss", "content": 0.006771281361579895, "timestamp": "2025-09-30 22:14:25.880475", "step": 1137, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:25.914584", "step": 1137, "epoch": 1 }, { "type": "loss", "content": 0.012895852327346802, "timestamp": "2025-09-30 22:14:25.926821", "step": 1138, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:25.963894", "step": 1138, "epoch": 1 }, { "type": "loss", "content": 0.01526438444852829, "timestamp": "2025-09-30 22:14:25.975018", "step": 1139, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:26.012594", "step": 1139, "epoch": 1 }, { "type": "loss", "content": 0.029251690953969955, "timestamp": "2025-09-30 22:14:26.049276", "step": 1140, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:26.083490", "step": 1140, "epoch": 1 }, { "type": "loss", "content": 0.007598114665597677, "timestamp": "2025-09-30 22:14:26.096484", "step": 1141, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:26.136465", "step": 1141, "epoch": 1 }, { "type": "loss", "content": 0.011994223110377789, "timestamp": "2025-09-30 22:14:26.144054", "step": 1142, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:26.199170", "step": 1142, "epoch": 1 }, { "type": "loss", "content": 0.011340709403157234, "timestamp": "2025-09-30 22:14:26.211463", "step": 1143, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:26.255168", "step": 1143, "epoch": 1 }, { "type": "loss", "content": 0.018428653478622437, "timestamp": "2025-09-30 22:14:26.287209", "step": 1144, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:26.322571", "step": 1144, "epoch": 1 }, { "type": "loss", "content": 0.030467689037322998, "timestamp": "2025-09-30 22:14:26.328109", "step": 1145, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:26.363426", "step": 1145, "epoch": 1 }, { "type": "loss", "content": 0.018375704064965248, "timestamp": "2025-09-30 22:14:26.375743", "step": 1146, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:26.412068", "step": 1146, "epoch": 1 }, { "type": "loss", "content": 0.019979046657681465, "timestamp": "2025-09-30 22:14:26.419092", "step": 1147, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:26.452579", "step": 1147, "epoch": 1 }, { "type": "loss", "content": 0.023155096918344498, "timestamp": "2025-09-30 22:14:26.485760", "step": 1148, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:26.518356", "step": 1148, "epoch": 1 }, { "type": "loss", "content": 0.008853713050484657, "timestamp": "2025-09-30 22:14:26.527040", "step": 1149, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:26.561309", "step": 1149, "epoch": 1 }, { "type": "loss", "content": 0.020854298025369644, "timestamp": "2025-09-30 22:14:26.569089", "step": 1150, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:14:29.078713", "step": 1150, "epoch": 1 }, { "type": "pplx", "content": 5.631088735057543, "timestamp": "2025-09-30 22:14:29.081608", "step": 1150, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:29.120320", "step": 1150, "epoch": 1 }, { "type": "loss", "content": 0.02393251657485962, "timestamp": "2025-09-30 22:14:29.130476", "step": 1151, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:29.166702", "step": 1151, "epoch": 1 }, { "type": "loss", "content": 0.03351360186934471, "timestamp": "2025-09-30 22:14:29.198568", "step": 1152, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:14:29.243341", "step": 1152, "epoch": 1 }, { "type": "loss", "content": 0.005868937820196152, "timestamp": "2025-09-30 22:14:29.260017", "step": 1153, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:29.301274", "step": 1153, "epoch": 1 }, { "type": "loss", "content": 0.00861198641359806, "timestamp": "2025-09-30 22:14:29.314661", "step": 1154, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:29.353983", "step": 1154, "epoch": 1 }, { "type": "loss", "content": 0.012496614828705788, "timestamp": "2025-09-30 22:14:29.367678", "step": 1155, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:29.407525", "step": 1155, "epoch": 1 }, { "type": "loss", "content": 0.012300015427172184, "timestamp": "2025-09-30 22:14:29.441709", "step": 1156, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:29.479946", "step": 1156, "epoch": 1 }, { "type": "loss", "content": 0.009164150804281235, "timestamp": "2025-09-30 22:14:29.493085", "step": 1157, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:29.528327", "step": 1157, "epoch": 1 }, { "type": "loss", "content": 0.013321910984814167, "timestamp": "2025-09-30 22:14:29.540883", "step": 1158, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:29.574927", "step": 1158, "epoch": 1 }, { "type": "loss", "content": 0.013937913812696934, "timestamp": "2025-09-30 22:14:29.587268", "step": 1159, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:29.626438", "step": 1159, "epoch": 1 }, { "type": "loss", "content": 0.009495115838944912, "timestamp": "2025-09-30 22:14:29.661047", "step": 1160, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:14:29.700907", "step": 1160, "epoch": 1 }, { "type": "loss", "content": 0.01062469556927681, "timestamp": "2025-09-30 22:14:29.716571", "step": 1161, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:29.758185", "step": 1161, "epoch": 1 }, { "type": "loss", "content": 0.02869350276887417, "timestamp": "2025-09-30 22:14:29.769352", "step": 1162, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:29.805584", "step": 1162, "epoch": 1 }, { "type": "loss", "content": 0.009210729040205479, "timestamp": "2025-09-30 22:14:29.818147", "step": 1163, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:29.858283", "step": 1163, "epoch": 1 }, { "type": "loss", "content": 0.011261607520282269, "timestamp": "2025-09-30 22:14:29.892954", "step": 1164, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:29.935282", "step": 1164, "epoch": 1 }, { "type": "loss", "content": 0.020356422290205956, "timestamp": "2025-09-30 22:14:29.945868", "step": 1165, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:29.987048", "step": 1165, "epoch": 1 }, { "type": "loss", "content": 0.015439392998814583, "timestamp": "2025-09-30 22:14:29.999613", "step": 1166, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:30.034919", "step": 1166, "epoch": 1 }, { "type": "loss", "content": 0.016605112701654434, "timestamp": "2025-09-30 22:14:30.047465", "step": 1167, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:30.081251", "step": 1167, "epoch": 1 }, { "type": "loss", "content": 0.012671479023993015, "timestamp": "2025-09-30 22:14:30.114709", "step": 1168, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:30.149852", "step": 1168, "epoch": 1 }, { "type": "loss", "content": 0.012448888272047043, "timestamp": "2025-09-30 22:14:30.160565", "step": 1169, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:30.197732", "step": 1169, "epoch": 1 }, { "type": "loss", "content": 0.019691286608576775, "timestamp": "2025-09-30 22:14:30.211539", "step": 1170, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:30.254743", "step": 1170, "epoch": 1 }, { "type": "loss", "content": 0.0102019552141428, "timestamp": "2025-09-30 22:14:30.270309", "step": 1171, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:30.313658", "step": 1171, "epoch": 1 }, { "type": "loss", "content": 0.011992924846708775, "timestamp": "2025-09-30 22:14:30.348271", "step": 1172, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:14:30.392234", "step": 1172, "epoch": 1 }, { "type": "loss", "content": 0.008996584452688694, "timestamp": "2025-09-30 22:14:30.409569", "step": 1173, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:30.454367", "step": 1173, "epoch": 1 }, { "type": "loss", "content": 0.018789565190672874, "timestamp": "2025-09-30 22:14:30.467717", "step": 1174, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:30.501875", "step": 1174, "epoch": 1 }, { "type": "loss", "content": 0.01498965360224247, "timestamp": "2025-09-30 22:14:30.512971", "step": 1175, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:30.547409", "step": 1175, "epoch": 1 }, { "type": "loss", "content": 0.01690087839961052, "timestamp": "2025-09-30 22:14:30.580578", "step": 1176, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:30.623001", "step": 1176, "epoch": 1 }, { "type": "loss", "content": 0.014346581883728504, "timestamp": "2025-09-30 22:14:30.635623", "step": 1177, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:30.674179", "step": 1177, "epoch": 1 }, { "type": "loss", "content": 0.010005800984799862, "timestamp": "2025-09-30 22:14:30.687990", "step": 1178, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:30.726189", "step": 1178, "epoch": 1 }, { "type": "loss", "content": 0.01065401453524828, "timestamp": "2025-09-30 22:14:30.740030", "step": 1179, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:30.776887", "step": 1179, "epoch": 1 }, { "type": "loss", "content": 0.013994435779750347, "timestamp": "2025-09-30 22:14:30.811093", "step": 1180, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:30.842113", "step": 1180, "epoch": 1 }, { "type": "loss", "content": 0.015725145116448402, "timestamp": "2025-09-30 22:14:30.852237", "step": 1181, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:30.884891", "step": 1181, "epoch": 1 }, { "type": "loss", "content": 0.01210531685501337, "timestamp": "2025-09-30 22:14:30.896074", "step": 1182, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:30.933914", "step": 1182, "epoch": 1 }, { "type": "loss", "content": 0.018449081107974052, "timestamp": "2025-09-30 22:14:30.945014", "step": 1183, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:30.979632", "step": 1183, "epoch": 1 }, { "type": "loss", "content": 0.022223036736249924, "timestamp": "2025-09-30 22:14:31.012832", "step": 1184, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:31.045348", "step": 1184, "epoch": 1 }, { "type": "loss", "content": 0.013939021155238152, "timestamp": "2025-09-30 22:14:31.050217", "step": 1185, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:31.086672", "step": 1185, "epoch": 1 }, { "type": "loss", "content": 0.01341447327286005, "timestamp": "2025-09-30 22:14:31.098002", "step": 1186, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:14:31.144403", "step": 1186, "epoch": 1 }, { "type": "loss", "content": 0.019850490614771843, "timestamp": "2025-09-30 22:14:31.161418", "step": 1187, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:31.197931", "step": 1187, "epoch": 1 }, { "type": "loss", "content": 0.013188116252422333, "timestamp": "2025-09-30 22:14:31.231109", "step": 1188, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:31.266289", "step": 1188, "epoch": 1 }, { "type": "loss", "content": 0.015668513253331184, "timestamp": "2025-09-30 22:14:31.278951", "step": 1189, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:14:31.331133", "step": 1189, "epoch": 1 }, { "type": "loss", "content": 0.007118385750800371, "timestamp": "2025-09-30 22:14:31.346845", "step": 1190, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:14:31.405785", "step": 1190, "epoch": 1 }, { "type": "loss", "content": 0.00617865938693285, "timestamp": "2025-09-30 22:14:31.424889", "step": 1191, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:31.461054", "step": 1191, "epoch": 1 }, { "type": "loss", "content": 0.026619745418429375, "timestamp": "2025-09-30 22:14:31.495860", "step": 1192, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:31.533161", "step": 1192, "epoch": 1 }, { "type": "loss", "content": 0.031537625938653946, "timestamp": "2025-09-30 22:14:31.543031", "step": 1193, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:31.575561", "step": 1193, "epoch": 1 }, { "type": "loss", "content": 0.01197479572147131, "timestamp": "2025-09-30 22:14:31.587903", "step": 1194, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:31.627188", "step": 1194, "epoch": 1 }, { "type": "loss", "content": 0.008407039567828178, "timestamp": "2025-09-30 22:14:31.640941", "step": 1195, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:31.679357", "step": 1195, "epoch": 1 }, { "type": "loss", "content": 0.011829300783574581, "timestamp": "2025-09-30 22:14:31.714096", "step": 1196, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:14:31.759203", "step": 1196, "epoch": 1 }, { "type": "loss", "content": 0.0072440290823578835, "timestamp": "2025-09-30 22:14:31.774582", "step": 1197, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:31.814995", "step": 1197, "epoch": 1 }, { "type": "loss", "content": 0.014621003530919552, "timestamp": "2025-09-30 22:14:31.828696", "step": 1198, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:31.865096", "step": 1198, "epoch": 1 }, { "type": "loss", "content": 0.014671620912849903, "timestamp": "2025-09-30 22:14:31.875936", "step": 1199, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:31.911932", "step": 1199, "epoch": 1 }, { "type": "loss", "content": 0.016262758523225784, "timestamp": "2025-09-30 22:14:31.945212", "step": 1200, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:14:31.995437", "step": 1200, "epoch": 1 }, { "type": "loss", "content": 0.017557835206389427, "timestamp": "2025-09-30 22:14:32.011241", "step": 1201, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:32.058041", "step": 1201, "epoch": 1 }, { "type": "loss", "content": 0.02594558708369732, "timestamp": "2025-09-30 22:14:32.068004", "step": 1202, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:32.105487", "step": 1202, "epoch": 1 }, { "type": "loss", "content": 0.014456644654273987, "timestamp": "2025-09-30 22:14:32.118885", "step": 1203, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:14:32.162977", "step": 1203, "epoch": 1 }, { "type": "loss", "content": 0.005826851818710566, "timestamp": "2025-09-30 22:14:32.199991", "step": 1204, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:32.238766", "step": 1204, "epoch": 1 }, { "type": "loss", "content": 0.01122973021119833, "timestamp": "2025-09-30 22:14:32.251887", "step": 1205, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:32.289668", "step": 1205, "epoch": 1 }, { "type": "loss", "content": 0.013107744045555592, "timestamp": "2025-09-30 22:14:32.302134", "step": 1206, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:14:32.352305", "step": 1206, "epoch": 1 }, { "type": "loss", "content": 0.009503448382019997, "timestamp": "2025-09-30 22:14:32.369615", "step": 1207, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:14:32.419930", "step": 1207, "epoch": 1 }, { "type": "loss", "content": 0.00695630582049489, "timestamp": "2025-09-30 22:14:32.454828", "step": 1208, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:32.491661", "step": 1208, "epoch": 1 }, { "type": "loss", "content": 0.02943275310099125, "timestamp": "2025-09-30 22:14:32.496914", "step": 1209, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:32.531863", "step": 1209, "epoch": 1 }, { "type": "loss", "content": 0.026188671588897705, "timestamp": "2025-09-30 22:14:32.545403", "step": 1210, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:32.582654", "step": 1210, "epoch": 1 }, { "type": "loss", "content": 0.02500862441956997, "timestamp": "2025-09-30 22:14:32.590451", "step": 1211, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:32.627491", "step": 1211, "epoch": 1 }, { "type": "loss", "content": 0.027926690876483917, "timestamp": "2025-09-30 22:14:32.656152", "step": 1212, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:32.697633", "step": 1212, "epoch": 1 }, { "type": "loss", "content": 0.02507833205163479, "timestamp": "2025-09-30 22:14:32.703018", "step": 1213, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:32.737873", "step": 1213, "epoch": 1 }, { "type": "loss", "content": 0.01936890184879303, "timestamp": "2025-09-30 22:14:32.745479", "step": 1214, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:32.782811", "step": 1214, "epoch": 1 }, { "type": "loss", "content": 0.02364104799926281, "timestamp": "2025-09-30 22:14:32.790444", "step": 1215, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:32.829715", "step": 1215, "epoch": 1 }, { "type": "loss", "content": 0.018656298518180847, "timestamp": "2025-09-30 22:14:32.860840", "step": 1216, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:32.897430", "step": 1216, "epoch": 1 }, { "type": "loss", "content": 0.030074406415224075, "timestamp": "2025-09-30 22:14:32.905252", "step": 1217, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:32.945989", "step": 1217, "epoch": 1 }, { "type": "loss", "content": 0.02442289888858795, "timestamp": "2025-09-30 22:14:32.956477", "step": 1218, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:32.994799", "step": 1218, "epoch": 1 }, { "type": "loss", "content": 0.0336812362074852, "timestamp": "2025-09-30 22:14:33.007365", "step": 1219, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:33.043199", "step": 1219, "epoch": 1 }, { "type": "loss", "content": 0.034401409327983856, "timestamp": "2025-09-30 22:14:33.072080", "step": 1220, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:33.107277", "step": 1220, "epoch": 1 }, { "type": "loss", "content": 0.035166334360837936, "timestamp": "2025-09-30 22:14:33.112603", "step": 1221, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:33.154645", "step": 1221, "epoch": 1 }, { "type": "loss", "content": 0.03142448142170906, "timestamp": "2025-09-30 22:14:33.165080", "step": 1222, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:33.201292", "step": 1222, "epoch": 1 }, { "type": "loss", "content": 0.025196803733706474, "timestamp": "2025-09-30 22:14:33.208397", "step": 1223, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:33.262664", "step": 1223, "epoch": 1 }, { "type": "loss", "content": 0.020468074828386307, "timestamp": "2025-09-30 22:14:33.290524", "step": 1224, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:33.324942", "step": 1224, "epoch": 1 }, { "type": "loss", "content": 0.013399248011410236, "timestamp": "2025-09-30 22:14:33.329252", "step": 1225, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:33.382658", "step": 1225, "epoch": 1 }, { "type": "loss", "content": 0.012052931822836399, "timestamp": "2025-09-30 22:14:33.390425", "step": 1226, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:33.438194", "step": 1226, "epoch": 1 }, { "type": "loss", "content": 0.02524169534444809, "timestamp": "2025-09-30 22:14:33.449895", "step": 1227, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:33.492781", "step": 1227, "epoch": 1 }, { "type": "loss", "content": 0.017910048365592957, "timestamp": "2025-09-30 22:14:33.521444", "step": 1228, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:33.566482", "step": 1228, "epoch": 1 }, { "type": "loss", "content": 0.02475748024880886, "timestamp": "2025-09-30 22:14:33.572577", "step": 1229, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:33.606170", "step": 1229, "epoch": 1 }, { "type": "loss", "content": 0.011676200665533543, "timestamp": "2025-09-30 22:14:33.618458", "step": 1230, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:33.656791", "step": 1230, "epoch": 1 }, { "type": "loss", "content": 0.02215118147432804, "timestamp": "2025-09-30 22:14:33.670176", "step": 1231, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:33.720673", "step": 1231, "epoch": 1 }, { "type": "loss", "content": 0.03192958980798721, "timestamp": "2025-09-30 22:14:33.756721", "step": 1232, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:33.804183", "step": 1232, "epoch": 1 }, { "type": "loss", "content": 0.01824454963207245, "timestamp": "2025-09-30 22:14:33.809803", "step": 1233, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:33.854045", "step": 1233, "epoch": 1 }, { "type": "loss", "content": 0.027758145704865456, "timestamp": "2025-09-30 22:14:33.862041", "step": 1234, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:33.899491", "step": 1234, "epoch": 1 }, { "type": "loss", "content": 0.0229659266769886, "timestamp": "2025-09-30 22:14:33.910584", "step": 1235, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:33.951116", "step": 1235, "epoch": 1 }, { "type": "loss", "content": 0.023478014394640923, "timestamp": "2025-09-30 22:14:33.982309", "step": 1236, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:34.022237", "step": 1236, "epoch": 1 }, { "type": "loss", "content": 0.037181347608566284, "timestamp": "2025-09-30 22:14:34.027942", "step": 1237, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:34.074477", "step": 1237, "epoch": 1 }, { "type": "loss", "content": 0.02985082007944584, "timestamp": "2025-09-30 22:14:34.081615", "step": 1238, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:34.116074", "step": 1238, "epoch": 1 }, { "type": "loss", "content": 0.018528126180171967, "timestamp": "2025-09-30 22:14:34.123241", "step": 1239, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:34.156297", "step": 1239, "epoch": 1 }, { "type": "loss", "content": 0.02232883870601654, "timestamp": "2025-09-30 22:14:34.189770", "step": 1240, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:34.223282", "step": 1240, "epoch": 1 }, { "type": "loss", "content": 0.015140527859330177, "timestamp": "2025-09-30 22:14:34.231488", "step": 1241, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:34.270300", "step": 1241, "epoch": 1 }, { "type": "loss", "content": 0.01367161888629198, "timestamp": "2025-09-30 22:14:34.277822", "step": 1242, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:34.316622", "step": 1242, "epoch": 1 }, { "type": "loss", "content": 0.020984509959816933, "timestamp": "2025-09-30 22:14:34.324075", "step": 1243, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:34.357271", "step": 1243, "epoch": 1 }, { "type": "loss", "content": 0.02360849268734455, "timestamp": "2025-09-30 22:14:34.386089", "step": 1244, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:34.420326", "step": 1244, "epoch": 1 }, { "type": "loss", "content": 0.022554300725460052, "timestamp": "2025-09-30 22:14:34.424955", "step": 1245, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:34.458835", "step": 1245, "epoch": 1 }, { "type": "loss", "content": 0.033704500645399094, "timestamp": "2025-09-30 22:14:34.466752", "step": 1246, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:34.502718", "step": 1246, "epoch": 1 }, { "type": "loss", "content": 0.03208060562610626, "timestamp": "2025-09-30 22:14:34.513758", "step": 1247, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:34.547728", "step": 1247, "epoch": 1 }, { "type": "loss", "content": 0.025324244052171707, "timestamp": "2025-09-30 22:14:34.578881", "step": 1248, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:34.619602", "step": 1248, "epoch": 1 }, { "type": "loss", "content": 0.02882443554699421, "timestamp": "2025-09-30 22:14:34.628351", "step": 1249, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:34.666145", "step": 1249, "epoch": 1 }, { "type": "loss", "content": 0.020039178431034088, "timestamp": "2025-09-30 22:14:34.673931", "step": 1250, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:34.705421", "step": 1250, "epoch": 1 }, { "type": "loss", "content": 0.031007476150989532, "timestamp": "2025-09-30 22:14:34.712649", "step": 1251, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:34.747486", "step": 1251, "epoch": 1 }, { "type": "loss", "content": 0.01620536856353283, "timestamp": "2025-09-30 22:14:34.776275", "step": 1252, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:34.812433", "step": 1252, "epoch": 1 }, { "type": "loss", "content": 0.0327889584004879, "timestamp": "2025-09-30 22:14:34.820569", "step": 1253, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:34.854146", "step": 1253, "epoch": 1 }, { "type": "loss", "content": 0.018718326464295387, "timestamp": "2025-09-30 22:14:34.864299", "step": 1254, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:34.902071", "step": 1254, "epoch": 1 }, { "type": "loss", "content": 0.013132144697010517, "timestamp": "2025-09-30 22:14:34.912528", "step": 1255, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:34.950600", "step": 1255, "epoch": 1 }, { "type": "loss", "content": 0.016166796907782555, "timestamp": "2025-09-30 22:14:34.982665", "step": 1256, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:35.017807", "step": 1256, "epoch": 1 }, { "type": "loss", "content": 0.04190756008028984, "timestamp": "2025-09-30 22:14:35.026037", "step": 1257, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:35.063925", "step": 1257, "epoch": 1 }, { "type": "loss", "content": 0.02226782590150833, "timestamp": "2025-09-30 22:14:35.075370", "step": 1258, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:35.110791", "step": 1258, "epoch": 1 }, { "type": "loss", "content": 0.015998445451259613, "timestamp": "2025-09-30 22:14:35.118812", "step": 1259, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:35.152127", "step": 1259, "epoch": 1 }, { "type": "loss", "content": 0.018795574083924294, "timestamp": "2025-09-30 22:14:35.184045", "step": 1260, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:35.220739", "step": 1260, "epoch": 1 }, { "type": "loss", "content": 0.014654216356575489, "timestamp": "2025-09-30 22:14:35.229523", "step": 1261, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:35.265657", "step": 1261, "epoch": 1 }, { "type": "loss", "content": 0.026398485526442528, "timestamp": "2025-09-30 22:14:35.278217", "step": 1262, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:35.311735", "step": 1262, "epoch": 1 }, { "type": "loss", "content": 0.02187102660536766, "timestamp": "2025-09-30 22:14:35.324343", "step": 1263, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:35.356785", "step": 1263, "epoch": 1 }, { "type": "loss", "content": 0.012795783579349518, "timestamp": "2025-09-30 22:14:35.388589", "step": 1264, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:35.425154", "step": 1264, "epoch": 1 }, { "type": "loss", "content": 0.019397811964154243, "timestamp": "2025-09-30 22:14:35.433992", "step": 1265, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:14:37.879632", "step": 1265, "epoch": 1 }, { "type": "pplx", "content": 5.495163605757066, "timestamp": "2025-09-30 22:14:37.884261", "step": 1265, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:37.922690", "step": 1265, "epoch": 1 }, { "type": "loss", "content": 0.01661515049636364, "timestamp": "2025-09-30 22:14:37.932598", "step": 1266, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:37.969570", "step": 1266, "epoch": 1 }, { "type": "loss", "content": 0.016776934266090393, "timestamp": "2025-09-30 22:14:37.981879", "step": 1267, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:38.014273", "step": 1267, "epoch": 1 }, { "type": "loss", "content": 0.026616569608449936, "timestamp": "2025-09-30 22:14:38.042927", "step": 1268, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:38.081740", "step": 1268, "epoch": 1 }, { "type": "loss", "content": 0.02219020016491413, "timestamp": "2025-09-30 22:14:38.094395", "step": 1269, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:38.129588", "step": 1269, "epoch": 1 }, { "type": "loss", "content": 0.016495492309331894, "timestamp": "2025-09-30 22:14:38.140702", "step": 1270, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:38.173871", "step": 1270, "epoch": 1 }, { "type": "loss", "content": 0.019733276218175888, "timestamp": "2025-09-30 22:14:38.186251", "step": 1271, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:38.223489", "step": 1271, "epoch": 1 }, { "type": "loss", "content": 0.016676638275384903, "timestamp": "2025-09-30 22:14:38.257710", "step": 1272, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:38.290270", "step": 1272, "epoch": 1 }, { "type": "loss", "content": 0.02926846593618393, "timestamp": "2025-09-30 22:14:38.298062", "step": 1273, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:38.332042", "step": 1273, "epoch": 1 }, { "type": "loss", "content": 0.018852192908525467, "timestamp": "2025-09-30 22:14:38.344616", "step": 1274, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:38.378349", "step": 1274, "epoch": 1 }, { "type": "loss", "content": 0.039707720279693604, "timestamp": "2025-09-30 22:14:38.390685", "step": 1275, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:38.426839", "step": 1275, "epoch": 1 }, { "type": "loss", "content": 0.022720344364643097, "timestamp": "2025-09-30 22:14:38.460267", "step": 1276, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:38.496673", "step": 1276, "epoch": 1 }, { "type": "loss", "content": 0.018529046326875687, "timestamp": "2025-09-30 22:14:38.509329", "step": 1277, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:38.547858", "step": 1277, "epoch": 1 }, { "type": "loss", "content": 0.010352366603910923, "timestamp": "2025-09-30 22:14:38.558888", "step": 1278, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:38.599350", "step": 1278, "epoch": 1 }, { "type": "loss", "content": 0.02025136537849903, "timestamp": "2025-09-30 22:14:38.613005", "step": 1279, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:38.658819", "step": 1279, "epoch": 1 }, { "type": "loss", "content": 0.043575845658779144, "timestamp": "2025-09-30 22:14:38.692118", "step": 1280, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:38.728383", "step": 1280, "epoch": 1 }, { "type": "loss", "content": 0.024662936106324196, "timestamp": "2025-09-30 22:14:38.736414", "step": 1281, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:38.772107", "step": 1281, "epoch": 1 }, { "type": "loss", "content": 0.037019431591033936, "timestamp": "2025-09-30 22:14:38.783355", "step": 1282, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:38.817932", "step": 1282, "epoch": 1 }, { "type": "loss", "content": 0.025818834081292152, "timestamp": "2025-09-30 22:14:38.830510", "step": 1283, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:38.865102", "step": 1283, "epoch": 1 }, { "type": "loss", "content": 0.023233827203512192, "timestamp": "2025-09-30 22:14:38.897257", "step": 1284, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:38.929162", "step": 1284, "epoch": 1 }, { "type": "loss", "content": 0.019260883331298828, "timestamp": "2025-09-30 22:14:38.939907", "step": 1285, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:38.978083", "step": 1285, "epoch": 1 }, { "type": "loss", "content": 0.03170757740736008, "timestamp": "2025-09-30 22:14:38.988566", "step": 1286, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:39.021346", "step": 1286, "epoch": 1 }, { "type": "loss", "content": 0.03846264258027077, "timestamp": "2025-09-30 22:14:39.028749", "step": 1287, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:39.066305", "step": 1287, "epoch": 1 }, { "type": "loss", "content": 0.019059956073760986, "timestamp": "2025-09-30 22:14:39.099473", "step": 1288, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:39.136954", "step": 1288, "epoch": 1 }, { "type": "loss", "content": 0.03577815741300583, "timestamp": "2025-09-30 22:14:39.145843", "step": 1289, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:39.181812", "step": 1289, "epoch": 1 }, { "type": "loss", "content": 0.014517645351588726, "timestamp": "2025-09-30 22:14:39.189757", "step": 1290, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:39.226189", "step": 1290, "epoch": 1 }, { "type": "loss", "content": 0.024149658158421516, "timestamp": "2025-09-30 22:14:39.238513", "step": 1291, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:39.272275", "step": 1291, "epoch": 1 }, { "type": "loss", "content": 0.020498551428318024, "timestamp": "2025-09-30 22:14:39.301028", "step": 1292, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:39.334014", "step": 1292, "epoch": 1 }, { "type": "loss", "content": 0.02780788764357567, "timestamp": "2025-09-30 22:14:39.344882", "step": 1293, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:39.376736", "step": 1293, "epoch": 1 }, { "type": "loss", "content": 0.0203973688185215, "timestamp": "2025-09-30 22:14:39.389145", "step": 1294, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:39.423129", "step": 1294, "epoch": 1 }, { "type": "loss", "content": 0.02105989307165146, "timestamp": "2025-09-30 22:14:39.435507", "step": 1295, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:39.471633", "step": 1295, "epoch": 1 }, { "type": "loss", "content": 0.03150323033332825, "timestamp": "2025-09-30 22:14:39.505139", "step": 1296, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:39.538556", "step": 1296, "epoch": 1 }, { "type": "loss", "content": 0.016306867823004723, "timestamp": "2025-09-30 22:14:39.551240", "step": 1297, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:39.584222", "step": 1297, "epoch": 1 }, { "type": "loss", "content": 0.02700851857662201, "timestamp": "2025-09-30 22:14:39.596869", "step": 1298, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:39.634537", "step": 1298, "epoch": 1 }, { "type": "loss", "content": 0.016827302053570747, "timestamp": "2025-09-30 22:14:39.645725", "step": 1299, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:39.678289", "step": 1299, "epoch": 1 }, { "type": "loss", "content": 0.017379000782966614, "timestamp": "2025-09-30 22:14:39.711492", "step": 1300, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:39.748075", "step": 1300, "epoch": 1 }, { "type": "loss", "content": 0.018542049452662468, "timestamp": "2025-09-30 22:14:39.760723", "step": 1301, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:39.800412", "step": 1301, "epoch": 1 }, { "type": "loss", "content": 0.05579542741179466, "timestamp": "2025-09-30 22:14:39.814118", "step": 1302, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:39.850470", "step": 1302, "epoch": 1 }, { "type": "loss", "content": 0.025843879207968712, "timestamp": "2025-09-30 22:14:39.863029", "step": 1303, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:39.898563", "step": 1303, "epoch": 1 }, { "type": "loss", "content": 0.03374844789505005, "timestamp": "2025-09-30 22:14:39.931997", "step": 1304, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:39.971672", "step": 1304, "epoch": 1 }, { "type": "loss", "content": 0.03356417641043663, "timestamp": "2025-09-30 22:14:39.980435", "step": 1305, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:40.020789", "step": 1305, "epoch": 1 }, { "type": "loss", "content": 0.0371815450489521, "timestamp": "2025-09-30 22:14:40.033381", "step": 1306, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:40.071666", "step": 1306, "epoch": 1 }, { "type": "loss", "content": 0.0254992563277483, "timestamp": "2025-09-30 22:14:40.084243", "step": 1307, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:40.124501", "step": 1307, "epoch": 1 }, { "type": "loss", "content": 0.023711930960416794, "timestamp": "2025-09-30 22:14:40.157656", "step": 1308, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:40.196953", "step": 1308, "epoch": 1 }, { "type": "loss", "content": 0.013582438230514526, "timestamp": "2025-09-30 22:14:40.202118", "step": 1309, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:40.236947", "step": 1309, "epoch": 1 }, { "type": "loss", "content": 0.022634677588939667, "timestamp": "2025-09-30 22:14:40.249257", "step": 1310, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:40.283455", "step": 1310, "epoch": 1 }, { "type": "loss", "content": 0.02410058304667473, "timestamp": "2025-09-30 22:14:40.295969", "step": 1311, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:40.330288", "step": 1311, "epoch": 1 }, { "type": "loss", "content": 0.027308452874422073, "timestamp": "2025-09-30 22:14:40.361513", "step": 1312, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:40.395542", "step": 1312, "epoch": 1 }, { "type": "loss", "content": 0.0207161046564579, "timestamp": "2025-09-30 22:14:40.403591", "step": 1313, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:40.439897", "step": 1313, "epoch": 1 }, { "type": "loss", "content": 0.023866860195994377, "timestamp": "2025-09-30 22:14:40.451974", "step": 1314, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:40.485646", "step": 1314, "epoch": 1 }, { "type": "loss", "content": 0.026835141703486443, "timestamp": "2025-09-30 22:14:40.492867", "step": 1315, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:40.528042", "step": 1315, "epoch": 1 }, { "type": "loss", "content": 0.01742064207792282, "timestamp": "2025-09-30 22:14:40.556549", "step": 1316, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:40.592346", "step": 1316, "epoch": 1 }, { "type": "loss", "content": 0.01809242181479931, "timestamp": "2025-09-30 22:14:40.597522", "step": 1317, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:40.633796", "step": 1317, "epoch": 1 }, { "type": "loss", "content": 0.010253416374325752, "timestamp": "2025-09-30 22:14:40.641422", "step": 1318, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:40.676059", "step": 1318, "epoch": 1 }, { "type": "loss", "content": 0.026114145293831825, "timestamp": "2025-09-30 22:14:40.683700", "step": 1319, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:40.720993", "step": 1319, "epoch": 1 }, { "type": "loss", "content": 0.031265173107385635, "timestamp": "2025-09-30 22:14:40.749599", "step": 1320, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:40.788316", "step": 1320, "epoch": 1 }, { "type": "loss", "content": 0.019768333062529564, "timestamp": "2025-09-30 22:14:40.794019", "step": 1321, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:40.826280", "step": 1321, "epoch": 1 }, { "type": "loss", "content": 0.019529767334461212, "timestamp": "2025-09-30 22:14:40.836695", "step": 1322, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:40.869249", "step": 1322, "epoch": 1 }, { "type": "loss", "content": 0.015189185738563538, "timestamp": "2025-09-30 22:14:40.875993", "step": 1323, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:40.909013", "step": 1323, "epoch": 1 }, { "type": "loss", "content": 0.016119806095957756, "timestamp": "2025-09-30 22:14:40.937565", "step": 1324, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:40.970539", "step": 1324, "epoch": 1 }, { "type": "loss", "content": 0.026351531967520714, "timestamp": "2025-09-30 22:14:40.975830", "step": 1325, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:14:41.015553", "step": 1325, "epoch": 1 }, { "type": "loss", "content": 0.025196142494678497, "timestamp": "2025-09-30 22:14:41.019633", "step": 1326, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:41.052832", "step": 1326, "epoch": 1 }, { "type": "loss", "content": 0.028438767418265343, "timestamp": "2025-09-30 22:14:41.063183", "step": 1327, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:41.100579", "step": 1327, "epoch": 1 }, { "type": "loss", "content": 0.015023917891085148, "timestamp": "2025-09-30 22:14:41.133774", "step": 1328, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:41.168955", "step": 1328, "epoch": 1 }, { "type": "loss", "content": 0.024687767028808594, "timestamp": "2025-09-30 22:14:41.182033", "step": 1329, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:41.215953", "step": 1329, "epoch": 1 }, { "type": "loss", "content": 0.025566160678863525, "timestamp": "2025-09-30 22:14:41.223160", "step": 1330, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:41.256592", "step": 1330, "epoch": 1 }, { "type": "loss", "content": 0.03352648764848709, "timestamp": "2025-09-30 22:14:41.268716", "step": 1331, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:41.306170", "step": 1331, "epoch": 1 }, { "type": "loss", "content": 0.01492286752909422, "timestamp": "2025-09-30 22:14:41.340370", "step": 1332, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:41.374577", "step": 1332, "epoch": 1 }, { "type": "loss", "content": 0.023383338004350662, "timestamp": "2025-09-30 22:14:41.383279", "step": 1333, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:41.424534", "step": 1333, "epoch": 1 }, { "type": "loss", "content": 0.04956074804067612, "timestamp": "2025-09-30 22:14:41.437901", "step": 1334, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:41.472351", "step": 1334, "epoch": 1 }, { "type": "loss", "content": 0.05456084758043289, "timestamp": "2025-09-30 22:14:41.484663", "step": 1335, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:41.518630", "step": 1335, "epoch": 1 }, { "type": "loss", "content": 0.020251190289855003, "timestamp": "2025-09-30 22:14:41.550794", "step": 1336, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:41.583031", "step": 1336, "epoch": 1 }, { "type": "loss", "content": 0.03886295482516289, "timestamp": "2025-09-30 22:14:41.591081", "step": 1337, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:41.625815", "step": 1337, "epoch": 1 }, { "type": "loss", "content": 0.010892719961702824, "timestamp": "2025-09-30 22:14:41.638365", "step": 1338, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:41.678044", "step": 1338, "epoch": 1 }, { "type": "loss", "content": 0.011457058601081371, "timestamp": "2025-09-30 22:14:41.690487", "step": 1339, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:41.724460", "step": 1339, "epoch": 1 }, { "type": "loss", "content": 0.016294119879603386, "timestamp": "2025-09-30 22:14:41.757673", "step": 1340, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:41.798916", "step": 1340, "epoch": 1 }, { "type": "loss", "content": 0.0155197037383914, "timestamp": "2025-09-30 22:14:41.807023", "step": 1341, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:41.852745", "step": 1341, "epoch": 1 }, { "type": "loss", "content": 0.024812180548906326, "timestamp": "2025-09-30 22:14:41.863821", "step": 1342, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:41.898943", "step": 1342, "epoch": 1 }, { "type": "loss", "content": 0.03152640536427498, "timestamp": "2025-09-30 22:14:41.909395", "step": 1343, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:41.948050", "step": 1343, "epoch": 1 }, { "type": "loss", "content": 0.030339188873767853, "timestamp": "2025-09-30 22:14:41.979127", "step": 1344, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:42.015722", "step": 1344, "epoch": 1 }, { "type": "loss", "content": 0.022944064810872078, "timestamp": "2025-09-30 22:14:42.025847", "step": 1345, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:42.062117", "step": 1345, "epoch": 1 }, { "type": "loss", "content": 0.027891971170902252, "timestamp": "2025-09-30 22:14:42.074425", "step": 1346, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:42.111761", "step": 1346, "epoch": 1 }, { "type": "loss", "content": 0.02328789047896862, "timestamp": "2025-09-30 22:14:42.122638", "step": 1347, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:42.163005", "step": 1347, "epoch": 1 }, { "type": "loss", "content": 0.03815057501196861, "timestamp": "2025-09-30 22:14:42.197692", "step": 1348, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:42.242963", "step": 1348, "epoch": 1 }, { "type": "loss", "content": 0.019010033458471298, "timestamp": "2025-09-30 22:14:42.253478", "step": 1349, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:42.286346", "step": 1349, "epoch": 1 }, { "type": "loss", "content": 0.016303110867738724, "timestamp": "2025-09-30 22:14:42.294189", "step": 1350, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:42.326820", "step": 1350, "epoch": 1 }, { "type": "loss", "content": 0.019749823957681656, "timestamp": "2025-09-30 22:14:42.339396", "step": 1351, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:42.388655", "step": 1351, "epoch": 1 }, { "type": "loss", "content": 0.024683495983481407, "timestamp": "2025-09-30 22:14:42.416958", "step": 1352, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:42.451188", "step": 1352, "epoch": 1 }, { "type": "loss", "content": 0.02669571153819561, "timestamp": "2025-09-30 22:14:42.459856", "step": 1353, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:42.500323", "step": 1353, "epoch": 1 }, { "type": "loss", "content": 0.02056029625236988, "timestamp": "2025-09-30 22:14:42.511373", "step": 1354, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:42.547536", "step": 1354, "epoch": 1 }, { "type": "loss", "content": 0.014292556792497635, "timestamp": "2025-09-30 22:14:42.554701", "step": 1355, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:42.589863", "step": 1355, "epoch": 1 }, { "type": "loss", "content": 0.030081013217568398, "timestamp": "2025-09-30 22:14:42.618177", "step": 1356, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:42.652620", "step": 1356, "epoch": 1 }, { "type": "loss", "content": 0.02174229733645916, "timestamp": "2025-09-30 22:14:42.657371", "step": 1357, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:42.691366", "step": 1357, "epoch": 1 }, { "type": "loss", "content": 0.027395816519856453, "timestamp": "2025-09-30 22:14:42.695884", "step": 1358, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:42.730620", "step": 1358, "epoch": 1 }, { "type": "loss", "content": 0.03220497816801071, "timestamp": "2025-09-30 22:14:42.737679", "step": 1359, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:42.770565", "step": 1359, "epoch": 1 }, { "type": "loss", "content": 0.024621393531560898, "timestamp": "2025-09-30 22:14:42.801843", "step": 1360, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:42.834826", "step": 1360, "epoch": 1 }, { "type": "loss", "content": 0.009342104196548462, "timestamp": "2025-09-30 22:14:42.842796", "step": 1361, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:42.875495", "step": 1361, "epoch": 1 }, { "type": "loss", "content": 0.03075343370437622, "timestamp": "2025-09-30 22:14:42.888029", "step": 1362, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:42.927529", "step": 1362, "epoch": 1 }, { "type": "loss", "content": 0.02079709991812706, "timestamp": "2025-09-30 22:14:42.941240", "step": 1363, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:42.986053", "step": 1363, "epoch": 1 }, { "type": "loss", "content": 0.01046017650514841, "timestamp": "2025-09-30 22:14:43.014860", "step": 1364, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:43.049961", "step": 1364, "epoch": 1 }, { "type": "loss", "content": 0.02314908802509308, "timestamp": "2025-09-30 22:14:43.065044", "step": 1365, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:43.107156", "step": 1365, "epoch": 1 }, { "type": "loss", "content": 0.018210845068097115, "timestamp": "2025-09-30 22:14:43.123947", "step": 1366, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:43.180868", "step": 1366, "epoch": 1 }, { "type": "loss", "content": 0.010480647906661034, "timestamp": "2025-09-30 22:14:43.192049", "step": 1367, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:43.225069", "step": 1367, "epoch": 1 }, { "type": "loss", "content": 0.02072896622121334, "timestamp": "2025-09-30 22:14:43.253146", "step": 1368, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:43.289303", "step": 1368, "epoch": 1 }, { "type": "loss", "content": 0.017621014267206192, "timestamp": "2025-09-30 22:14:43.297288", "step": 1369, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:43.341459", "step": 1369, "epoch": 1 }, { "type": "loss", "content": 0.02624642662703991, "timestamp": "2025-09-30 22:14:43.355294", "step": 1370, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:43.394559", "step": 1370, "epoch": 1 }, { "type": "loss", "content": 0.024825556203722954, "timestamp": "2025-09-30 22:14:43.402584", "step": 1371, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:43.437831", "step": 1371, "epoch": 1 }, { "type": "loss", "content": 0.026172012090682983, "timestamp": "2025-09-30 22:14:43.469777", "step": 1372, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:43.506668", "step": 1372, "epoch": 1 }, { "type": "loss", "content": 0.03473340719938278, "timestamp": "2025-09-30 22:14:43.515356", "step": 1373, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:43.565492", "step": 1373, "epoch": 1 }, { "type": "loss", "content": 0.021380873396992683, "timestamp": "2025-09-30 22:14:43.578114", "step": 1374, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:43.617474", "step": 1374, "epoch": 1 }, { "type": "loss", "content": 0.01798097975552082, "timestamp": "2025-09-30 22:14:43.627646", "step": 1375, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:43.664719", "step": 1375, "epoch": 1 }, { "type": "loss", "content": 0.010863942094147205, "timestamp": "2025-09-30 22:14:43.695929", "step": 1376, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:43.738635", "step": 1376, "epoch": 1 }, { "type": "loss", "content": 0.017268171533942223, "timestamp": "2025-09-30 22:14:43.749059", "step": 1377, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:43.788075", "step": 1377, "epoch": 1 }, { "type": "loss", "content": 0.013549873605370522, "timestamp": "2025-09-30 22:14:43.799274", "step": 1378, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:43.835508", "step": 1378, "epoch": 1 }, { "type": "loss", "content": 0.028922399505972862, "timestamp": "2025-09-30 22:14:43.842921", "step": 1379, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:43.888352", "step": 1379, "epoch": 1 }, { "type": "loss", "content": 0.03547342121601105, "timestamp": "2025-09-30 22:14:43.920201", "step": 1380, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:14:46.365049", "step": 1380, "epoch": 1 }, { "type": "pplx", "content": 5.286725956468357, "timestamp": "2025-09-30 22:14:46.367959", "step": 1380, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:46.404069", "step": 1380, "epoch": 1 }, { "type": "loss", "content": 0.03862202540040016, "timestamp": "2025-09-30 22:14:46.417036", "step": 1381, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:46.450342", "step": 1381, "epoch": 1 }, { "type": "loss", "content": 0.01243166346102953, "timestamp": "2025-09-30 22:14:46.458144", "step": 1382, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:46.493939", "step": 1382, "epoch": 1 }, { "type": "loss", "content": 0.029087301343679428, "timestamp": "2025-09-30 22:14:46.507613", "step": 1383, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:46.544961", "step": 1383, "epoch": 1 }, { "type": "loss", "content": 0.027522187680006027, "timestamp": "2025-09-30 22:14:46.579171", "step": 1384, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:46.622304", "step": 1384, "epoch": 1 }, { "type": "loss", "content": 0.023662393912672997, "timestamp": "2025-09-30 22:14:46.632103", "step": 1385, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:46.676339", "step": 1385, "epoch": 1 }, { "type": "loss", "content": 0.018998509272933006, "timestamp": "2025-09-30 22:14:46.689756", "step": 1386, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:46.729346", "step": 1386, "epoch": 1 }, { "type": "loss", "content": 0.012357273139059544, "timestamp": "2025-09-30 22:14:46.741469", "step": 1387, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:46.776033", "step": 1387, "epoch": 1 }, { "type": "loss", "content": 0.018847810104489326, "timestamp": "2025-09-30 22:14:46.807929", "step": 1388, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:46.842264", "step": 1388, "epoch": 1 }, { "type": "loss", "content": 0.016082987189292908, "timestamp": "2025-09-30 22:14:46.847053", "step": 1389, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:46.885966", "step": 1389, "epoch": 1 }, { "type": "loss", "content": 0.013551075011491776, "timestamp": "2025-09-30 22:14:46.893599", "step": 1390, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:46.932531", "step": 1390, "epoch": 1 }, { "type": "loss", "content": 0.03684048727154732, "timestamp": "2025-09-30 22:14:46.939847", "step": 1391, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:46.977409", "step": 1391, "epoch": 1 }, { "type": "loss", "content": 0.00784347578883171, "timestamp": "2025-09-30 22:14:47.005745", "step": 1392, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:47.040739", "step": 1392, "epoch": 1 }, { "type": "loss", "content": 0.012781267985701561, "timestamp": "2025-09-30 22:14:47.053380", "step": 1393, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:47.091382", "step": 1393, "epoch": 1 }, { "type": "loss", "content": 0.018491802737116814, "timestamp": "2025-09-30 22:14:47.102395", "step": 1394, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:47.135934", "step": 1394, "epoch": 1 }, { "type": "loss", "content": 0.01759738102555275, "timestamp": "2025-09-30 22:14:47.147046", "step": 1395, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:14:47.197583", "step": 1395, "epoch": 1 }, { "type": "loss", "content": 0.01280723512172699, "timestamp": "2025-09-30 22:14:47.232297", "step": 1396, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:47.273176", "step": 1396, "epoch": 1 }, { "type": "loss", "content": 0.021734340116381645, "timestamp": "2025-09-30 22:14:47.281804", "step": 1397, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:47.317166", "step": 1397, "epoch": 1 }, { "type": "loss", "content": 0.01839459501206875, "timestamp": "2025-09-30 22:14:47.328093", "step": 1398, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:47.362740", "step": 1398, "epoch": 1 }, { "type": "loss", "content": 0.02381458505988121, "timestamp": "2025-09-30 22:14:47.373081", "step": 1399, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:47.416305", "step": 1399, "epoch": 1 }, { "type": "loss", "content": 0.015866320580244064, "timestamp": "2025-09-30 22:14:47.447390", "step": 1400, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:47.480792", "step": 1400, "epoch": 1 }, { "type": "loss", "content": 0.011865449137985706, "timestamp": "2025-09-30 22:14:47.491301", "step": 1401, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:47.524867", "step": 1401, "epoch": 1 }, { "type": "loss", "content": 0.02674529328942299, "timestamp": "2025-09-30 22:14:47.531964", "step": 1402, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:47.570980", "step": 1402, "epoch": 1 }, { "type": "loss", "content": 0.016522124409675598, "timestamp": "2025-09-30 22:14:47.581363", "step": 1403, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:47.624708", "step": 1403, "epoch": 1 }, { "type": "loss", "content": 0.015584531240165234, "timestamp": "2025-09-30 22:14:47.653479", "step": 1404, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:47.693659", "step": 1404, "epoch": 1 }, { "type": "loss", "content": 0.024292299523949623, "timestamp": "2025-09-30 22:14:47.706285", "step": 1405, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:47.744131", "step": 1405, "epoch": 1 }, { "type": "loss", "content": 0.01535879261791706, "timestamp": "2025-09-30 22:14:47.752109", "step": 1406, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:47.798486", "step": 1406, "epoch": 1 }, { "type": "loss", "content": 0.01519247330725193, "timestamp": "2025-09-30 22:14:47.809629", "step": 1407, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:47.851854", "step": 1407, "epoch": 1 }, { "type": "loss", "content": 0.027948128059506416, "timestamp": "2025-09-30 22:14:47.885364", "step": 1408, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:47.926169", "step": 1408, "epoch": 1 }, { "type": "loss", "content": 0.04772356152534485, "timestamp": "2025-09-30 22:14:47.934966", "step": 1409, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:47.974889", "step": 1409, "epoch": 1 }, { "type": "loss", "content": 0.011634491384029388, "timestamp": "2025-09-30 22:14:47.987128", "step": 1410, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:48.024939", "step": 1410, "epoch": 1 }, { "type": "loss", "content": 0.04621616378426552, "timestamp": "2025-09-30 22:14:48.033222", "step": 1411, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:48.074759", "step": 1411, "epoch": 1 }, { "type": "loss", "content": 0.028265489265322685, "timestamp": "2025-09-30 22:14:48.105905", "step": 1412, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:48.147053", "step": 1412, "epoch": 1 }, { "type": "loss", "content": 0.018142448738217354, "timestamp": "2025-09-30 22:14:48.157535", "step": 1413, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:48.199585", "step": 1413, "epoch": 1 }, { "type": "loss", "content": 0.019383694976568222, "timestamp": "2025-09-30 22:14:48.211948", "step": 1414, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:48.249624", "step": 1414, "epoch": 1 }, { "type": "loss", "content": 0.016686297953128815, "timestamp": "2025-09-30 22:14:48.262951", "step": 1415, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:48.306134", "step": 1415, "epoch": 1 }, { "type": "loss", "content": 0.02211880125105381, "timestamp": "2025-09-30 22:14:48.338033", "step": 1416, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:48.373783", "step": 1416, "epoch": 1 }, { "type": "loss", "content": 0.01303754560649395, "timestamp": "2025-09-30 22:14:48.383084", "step": 1417, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:48.420102", "step": 1417, "epoch": 1 }, { "type": "loss", "content": 0.010236444883048534, "timestamp": "2025-09-30 22:14:48.431310", "step": 1418, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:48.476595", "step": 1418, "epoch": 1 }, { "type": "loss", "content": 0.028854433447122574, "timestamp": "2025-09-30 22:14:48.484170", "step": 1419, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:48.524885", "step": 1419, "epoch": 1 }, { "type": "loss", "content": 0.02037026733160019, "timestamp": "2025-09-30 22:14:48.556063", "step": 1420, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:48.594988", "step": 1420, "epoch": 1 }, { "type": "loss", "content": 0.0194195918738842, "timestamp": "2025-09-30 22:14:48.603650", "step": 1421, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:48.646552", "step": 1421, "epoch": 1 }, { "type": "loss", "content": 0.017392108216881752, "timestamp": "2025-09-30 22:14:48.654318", "step": 1422, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:48.698651", "step": 1422, "epoch": 1 }, { "type": "loss", "content": 0.021164512261748314, "timestamp": "2025-09-30 22:14:48.709651", "step": 1423, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:48.751784", "step": 1423, "epoch": 1 }, { "type": "loss", "content": 0.022570928558707237, "timestamp": "2025-09-30 22:14:48.779645", "step": 1424, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:48.817560", "step": 1424, "epoch": 1 }, { "type": "loss", "content": 0.021067731082439423, "timestamp": "2025-09-30 22:14:48.822464", "step": 1425, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:48.860132", "step": 1425, "epoch": 1 }, { "type": "loss", "content": 0.018480515107512474, "timestamp": "2025-09-30 22:14:48.867124", "step": 1426, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:14:48.902698", "step": 1426, "epoch": 1 }, { "type": "loss", "content": 0.010004106909036636, "timestamp": "2025-09-30 22:14:48.912820", "step": 1427, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:48.950185", "step": 1427, "epoch": 1 }, { "type": "loss", "content": 0.011603604070842266, "timestamp": "2025-09-30 22:14:48.981323", "step": 1428, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:49.022742", "step": 1428, "epoch": 1 }, { "type": "loss", "content": 0.01244189403951168, "timestamp": "2025-09-30 22:14:49.033237", "step": 1429, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:49.079495", "step": 1429, "epoch": 1 }, { "type": "loss", "content": 0.024397503584623337, "timestamp": "2025-09-30 22:14:49.090936", "step": 1430, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:14:49.125591", "step": 1430, "epoch": 1 }, { "type": "loss", "content": 0.019764477387070656, "timestamp": "2025-09-30 22:14:49.136417", "step": 1431, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:49.176049", "step": 1431, "epoch": 1 }, { "type": "loss", "content": 0.01619694009423256, "timestamp": "2025-09-30 22:14:49.203961", "step": 1432, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:14:49.239933", "step": 1432, "epoch": 1 }, { "type": "loss", "content": 0.024240665137767792, "timestamp": "2025-09-30 22:14:49.249496", "step": 1433, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:49.292665", "step": 1433, "epoch": 1 }, { "type": "loss", "content": 0.020648330450057983, "timestamp": "2025-09-30 22:14:49.306019", "step": 1434, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:49.342856", "step": 1434, "epoch": 1 }, { "type": "loss", "content": 0.01836603321135044, "timestamp": "2025-09-30 22:14:49.358454", "step": 1435, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:49.394412", "step": 1435, "epoch": 1 }, { "type": "loss", "content": 0.019589319825172424, "timestamp": "2025-09-30 22:14:49.425397", "step": 1436, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:49.462067", "step": 1436, "epoch": 1 }, { "type": "loss", "content": 0.0132905263453722, "timestamp": "2025-09-30 22:14:49.467811", "step": 1437, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:49.513656", "step": 1437, "epoch": 1 }, { "type": "loss", "content": 0.013705338351428509, "timestamp": "2025-09-30 22:14:49.526568", "step": 1438, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:49.561658", "step": 1438, "epoch": 1 }, { "type": "loss", "content": 0.007382605224847794, "timestamp": "2025-09-30 22:14:49.572692", "step": 1439, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:49.605953", "step": 1439, "epoch": 1 }, { "type": "loss", "content": 0.021737150847911835, "timestamp": "2025-09-30 22:14:49.634868", "step": 1440, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:49.680452", "step": 1440, "epoch": 1 }, { "type": "loss", "content": 0.022566504776477814, "timestamp": "2025-09-30 22:14:49.685634", "step": 1441, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:49.731781", "step": 1441, "epoch": 1 }, { "type": "loss", "content": 0.020912060514092445, "timestamp": "2025-09-30 22:14:49.739698", "step": 1442, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:49.776740", "step": 1442, "epoch": 1 }, { "type": "loss", "content": 0.02049718052148819, "timestamp": "2025-09-30 22:14:49.784392", "step": 1443, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:49.827083", "step": 1443, "epoch": 1 }, { "type": "loss", "content": 0.01551031693816185, "timestamp": "2025-09-30 22:14:49.860135", "step": 1444, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:49.896701", "step": 1444, "epoch": 1 }, { "type": "loss", "content": 0.026235654950141907, "timestamp": "2025-09-30 22:14:49.902310", "step": 1445, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:49.936472", "step": 1445, "epoch": 1 }, { "type": "loss", "content": 0.013224468566477299, "timestamp": "2025-09-30 22:14:49.944278", "step": 1446, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:49.982896", "step": 1446, "epoch": 1 }, { "type": "loss", "content": 0.01962916925549507, "timestamp": "2025-09-30 22:14:49.996254", "step": 1447, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:50.036035", "step": 1447, "epoch": 1 }, { "type": "loss", "content": 0.022626500576734543, "timestamp": "2025-09-30 22:14:50.064701", "step": 1448, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:50.105501", "step": 1448, "epoch": 1 }, { "type": "loss", "content": 0.027859408408403397, "timestamp": "2025-09-30 22:14:50.115635", "step": 1449, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:50.155662", "step": 1449, "epoch": 1 }, { "type": "loss", "content": 0.013107175007462502, "timestamp": "2025-09-30 22:14:50.168243", "step": 1450, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:14:50.208708", "step": 1450, "epoch": 1 }, { "type": "loss", "content": 0.010612815618515015, "timestamp": "2025-09-30 22:14:50.222118", "step": 1451, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:50.261565", "step": 1451, "epoch": 1 }, { "type": "loss", "content": 0.017334220930933952, "timestamp": "2025-09-30 22:14:50.289320", "step": 1452, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:14:50.324188", "step": 1452, "epoch": 1 }, { "type": "loss", "content": 0.0179128535091877, "timestamp": "2025-09-30 22:14:50.327415", "step": 1453, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:50.363052", "step": 1453, "epoch": 1 }, { "type": "loss", "content": 0.015791935846209526, "timestamp": "2025-09-30 22:14:50.375569", "step": 1454, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:50.418426", "step": 1454, "epoch": 1 }, { "type": "loss", "content": 0.022962335497140884, "timestamp": "2025-09-30 22:14:50.430170", "step": 1455, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:50.475264", "step": 1455, "epoch": 1 }, { "type": "loss", "content": 0.01597381755709648, "timestamp": "2025-09-30 22:14:50.507608", "step": 1456, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:50.540652", "step": 1456, "epoch": 1 }, { "type": "loss", "content": 0.02298254333436489, "timestamp": "2025-09-30 22:14:50.552374", "step": 1457, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:50.592361", "step": 1457, "epoch": 1 }, { "type": "loss", "content": 0.03316576033830643, "timestamp": "2025-09-30 22:14:50.605231", "step": 1458, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:14:50.641681", "step": 1458, "epoch": 1 }, { "type": "loss", "content": 0.048388849943876266, "timestamp": "2025-09-30 22:14:50.653407", "step": 1459, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:50.686171", "step": 1459, "epoch": 1 }, { "type": "loss", "content": 0.028634879738092422, "timestamp": "2025-09-30 22:14:50.714133", "step": 1460, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:50.755989", "step": 1460, "epoch": 1 }, { "type": "loss", "content": 0.018213100731372833, "timestamp": "2025-09-30 22:14:50.763160", "step": 1461, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:50.806069", "step": 1461, "epoch": 1 }, { "type": "loss", "content": 0.023431608453392982, "timestamp": "2025-09-30 22:14:50.814058", "step": 1462, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:50.851758", "step": 1462, "epoch": 1 }, { "type": "loss", "content": 0.014353444799780846, "timestamp": "2025-09-30 22:14:50.862888", "step": 1463, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:50.905329", "step": 1463, "epoch": 1 }, { "type": "loss", "content": 0.028549939393997192, "timestamp": "2025-09-30 22:14:50.934180", "step": 1464, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:50.968627", "step": 1464, "epoch": 1 }, { "type": "loss", "content": 0.016568686813116074, "timestamp": "2025-09-30 22:14:50.981827", "step": 1465, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:51.018628", "step": 1465, "epoch": 1 }, { "type": "loss", "content": 0.02902393415570259, "timestamp": "2025-09-30 22:14:51.026356", "step": 1466, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:51.063954", "step": 1466, "epoch": 1 }, { "type": "loss", "content": 0.01421070285141468, "timestamp": "2025-09-30 22:14:51.076566", "step": 1467, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:51.120548", "step": 1467, "epoch": 1 }, { "type": "loss", "content": 0.023018157109618187, "timestamp": "2025-09-30 22:14:51.153703", "step": 1468, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:51.190625", "step": 1468, "epoch": 1 }, { "type": "loss", "content": 0.03066314198076725, "timestamp": "2025-09-30 22:14:51.199382", "step": 1469, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:51.238666", "step": 1469, "epoch": 1 }, { "type": "loss", "content": 0.022856958210468292, "timestamp": "2025-09-30 22:14:51.249772", "step": 1470, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:51.282580", "step": 1470, "epoch": 1 }, { "type": "loss", "content": 0.018278757110238075, "timestamp": "2025-09-30 22:14:51.290309", "step": 1471, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:51.332739", "step": 1471, "epoch": 1 }, { "type": "loss", "content": 0.015473520383238792, "timestamp": "2025-09-30 22:14:51.361506", "step": 1472, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:51.397460", "step": 1472, "epoch": 1 }, { "type": "loss", "content": 0.029607480391860008, "timestamp": "2025-09-30 22:14:51.402376", "step": 1473, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:51.437797", "step": 1473, "epoch": 1 }, { "type": "loss", "content": 0.01510325912386179, "timestamp": "2025-09-30 22:14:51.448861", "step": 1474, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:51.487589", "step": 1474, "epoch": 1 }, { "type": "loss", "content": 0.02018461562693119, "timestamp": "2025-09-30 22:14:51.497693", "step": 1475, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:51.544161", "step": 1475, "epoch": 1 }, { "type": "loss", "content": 0.026856016367673874, "timestamp": "2025-09-30 22:14:51.577294", "step": 1476, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:51.611748", "step": 1476, "epoch": 1 }, { "type": "loss", "content": 0.026579078286886215, "timestamp": "2025-09-30 22:14:51.619703", "step": 1477, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:51.654763", "step": 1477, "epoch": 1 }, { "type": "loss", "content": 0.032938793301582336, "timestamp": "2025-09-30 22:14:51.662463", "step": 1478, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:51.695924", "step": 1478, "epoch": 1 }, { "type": "loss", "content": 0.025103628635406494, "timestamp": "2025-09-30 22:14:51.703825", "step": 1479, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:51.744172", "step": 1479, "epoch": 1 }, { "type": "loss", "content": 0.018662355840206146, "timestamp": "2025-09-30 22:14:51.775221", "step": 1480, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:51.815735", "step": 1480, "epoch": 1 }, { "type": "loss", "content": 0.028233205899596214, "timestamp": "2025-09-30 22:14:51.824452", "step": 1481, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:51.860848", "step": 1481, "epoch": 1 }, { "type": "loss", "content": 0.034671224653720856, "timestamp": "2025-09-30 22:14:51.871956", "step": 1482, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:51.909759", "step": 1482, "epoch": 1 }, { "type": "loss", "content": 0.010450835339725018, "timestamp": "2025-09-30 22:14:51.920796", "step": 1483, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:51.960392", "step": 1483, "epoch": 1 }, { "type": "loss", "content": 0.03715112432837486, "timestamp": "2025-09-30 22:14:51.995030", "step": 1484, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:52.035435", "step": 1484, "epoch": 1 }, { "type": "loss", "content": 0.026053503155708313, "timestamp": "2025-09-30 22:14:52.048462", "step": 1485, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:14:52.084696", "step": 1485, "epoch": 1 }, { "type": "loss", "content": 0.026194270700216293, "timestamp": "2025-09-30 22:14:52.091998", "step": 1486, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:14:52.136783", "step": 1486, "epoch": 1 }, { "type": "loss", "content": 0.010904263705015182, "timestamp": "2025-09-30 22:14:52.149360", "step": 1487, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:14:52.206340", "step": 1487, "epoch": 1 }, { "type": "loss", "content": 0.014208262786269188, "timestamp": "2025-09-30 22:14:52.244887", "step": 1488, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:52.281853", "step": 1488, "epoch": 1 }, { "type": "loss", "content": 0.01684318482875824, "timestamp": "2025-09-30 22:14:52.291660", "step": 1489, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:14:52.328868", "step": 1489, "epoch": 1 }, { "type": "loss", "content": 0.019845174625515938, "timestamp": "2025-09-30 22:14:52.341130", "step": 1490, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:52.381516", "step": 1490, "epoch": 1 }, { "type": "loss", "content": 0.018346065655350685, "timestamp": "2025-09-30 22:14:52.391742", "step": 1491, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:52.430278", "step": 1491, "epoch": 1 }, { "type": "loss", "content": 0.020569315180182457, "timestamp": "2025-09-30 22:14:52.461495", "step": 1492, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:14:52.497691", "step": 1492, "epoch": 1 }, { "type": "loss", "content": 0.03632209822535515, "timestamp": "2025-09-30 22:14:52.503283", "step": 1493, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:14:52.536043", "step": 1493, "epoch": 1 }, { "type": "loss", "content": 0.009968671016395092, "timestamp": "2025-09-30 22:14:52.543035", "step": 1494, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:52.577073", "step": 1494, "epoch": 1 }, { "type": "loss", "content": 0.01671568490564823, "timestamp": "2025-09-30 22:14:52.588133", "step": 1495, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:14:55.060162", "step": 1495, "epoch": 1 }, { "type": "pplx", "content": 5.379322825697576, "timestamp": "2025-09-30 22:14:55.062560", "step": 1495, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:14:55.098604", "step": 1495, "epoch": 1 }, { "type": "loss", "content": 0.021652352064847946, "timestamp": "2025-09-30 22:14:55.128525", "step": 1496, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:14:55.165493", "step": 1496, "epoch": 1 }, { "type": "loss", "content": 0.01600632071495056, "timestamp": "2025-09-30 22:14:55.178513", "step": 1497, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:55.214822", "step": 1497, "epoch": 1 }, { "type": "loss", "content": 0.023342151194810867, "timestamp": "2025-09-30 22:14:55.222394", "step": 1498, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:14:55.256101", "step": 1498, "epoch": 1 }, { "type": "loss", "content": 0.025111747905611992, "timestamp": "2025-09-30 22:14:55.263723", "step": 1499, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:14:55.312464", "step": 1499, "epoch": 1 }, { "type": "loss", "content": 0.015089130960404873, "timestamp": "2025-09-30 22:14:55.344332", "step": 1500, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 1500", "timestamp": "2025-09-30 22:15:00.722951", "step": 1500, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:00.758414", "step": 1500, "epoch": 1 }, { "type": "loss", "content": 0.01703370362520218, "timestamp": "2025-09-30 22:15:00.765556", "step": 1501, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:00.799722", "step": 1501, "epoch": 1 }, { "type": "loss", "content": 0.007595858536660671, "timestamp": "2025-09-30 22:15:00.812250", "step": 1502, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:00.852574", "step": 1502, "epoch": 1 }, { "type": "loss", "content": 0.010175961069762707, "timestamp": "2025-09-30 22:15:00.865827", "step": 1503, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:00.902350", "step": 1503, "epoch": 1 }, { "type": "loss", "content": 0.009721358306705952, "timestamp": "2025-09-30 22:15:00.933344", "step": 1504, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:00.967035", "step": 1504, "epoch": 1 }, { "type": "loss", "content": 0.009681370109319687, "timestamp": "2025-09-30 22:15:00.975783", "step": 1505, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:01.011856", "step": 1505, "epoch": 1 }, { "type": "loss", "content": 0.011668430641293526, "timestamp": "2025-09-30 22:15:01.023057", "step": 1506, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:01.056579", "step": 1506, "epoch": 1 }, { "type": "loss", "content": 0.030000098049640656, "timestamp": "2025-09-30 22:15:01.063787", "step": 1507, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:01.100682", "step": 1507, "epoch": 1 }, { "type": "loss", "content": 0.012646145187318325, "timestamp": "2025-09-30 22:15:01.129508", "step": 1508, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:01.164880", "step": 1508, "epoch": 1 }, { "type": "loss", "content": 0.006477945484220982, "timestamp": "2025-09-30 22:15:01.177937", "step": 1509, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:01.212380", "step": 1509, "epoch": 1 }, { "type": "loss", "content": 0.013549396768212318, "timestamp": "2025-09-30 22:15:01.223509", "step": 1510, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:01.257809", "step": 1510, "epoch": 1 }, { "type": "loss", "content": 0.02432546205818653, "timestamp": "2025-09-30 22:15:01.268977", "step": 1511, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:01.306775", "step": 1511, "epoch": 1 }, { "type": "loss", "content": 0.017720526084303856, "timestamp": "2025-09-30 22:15:01.338002", "step": 1512, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:01.369703", "step": 1512, "epoch": 1 }, { "type": "loss", "content": 0.006004958879202604, "timestamp": "2025-09-30 22:15:01.379666", "step": 1513, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:01.417717", "step": 1513, "epoch": 1 }, { "type": "loss", "content": 0.033136531710624695, "timestamp": "2025-09-30 22:15:01.430007", "step": 1514, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:01.464629", "step": 1514, "epoch": 1 }, { "type": "loss", "content": 0.028014734387397766, "timestamp": "2025-09-30 22:15:01.472430", "step": 1515, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:01.510144", "step": 1515, "epoch": 1 }, { "type": "loss", "content": 0.03069695271551609, "timestamp": "2025-09-30 22:15:01.542134", "step": 1516, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:01.582810", "step": 1516, "epoch": 1 }, { "type": "loss", "content": 0.015695618465542793, "timestamp": "2025-09-30 22:15:01.590867", "step": 1517, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:01.622988", "step": 1517, "epoch": 1 }, { "type": "loss", "content": 0.040362466126680374, "timestamp": "2025-09-30 22:15:01.630605", "step": 1518, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:01.666964", "step": 1518, "epoch": 1 }, { "type": "loss", "content": 0.02767367660999298, "timestamp": "2025-09-30 22:15:01.674185", "step": 1519, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:01.710346", "step": 1519, "epoch": 1 }, { "type": "loss", "content": 0.04135942831635475, "timestamp": "2025-09-30 22:15:01.739107", "step": 1520, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:01.776647", "step": 1520, "epoch": 1 }, { "type": "loss", "content": 0.026216236874461174, "timestamp": "2025-09-30 22:15:01.781834", "step": 1521, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:01.823340", "step": 1521, "epoch": 1 }, { "type": "loss", "content": 0.03001749888062477, "timestamp": "2025-09-30 22:15:01.835705", "step": 1522, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:01.874484", "step": 1522, "epoch": 1 }, { "type": "loss", "content": 0.019269688054919243, "timestamp": "2025-09-30 22:15:01.887783", "step": 1523, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:01.928177", "step": 1523, "epoch": 1 }, { "type": "loss", "content": 0.02227448858320713, "timestamp": "2025-09-30 22:15:01.956960", "step": 1524, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:01.994250", "step": 1524, "epoch": 1 }, { "type": "loss", "content": 0.02891368605196476, "timestamp": "2025-09-30 22:15:01.999631", "step": 1525, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:02.038187", "step": 1525, "epoch": 1 }, { "type": "loss", "content": 0.017416177317500114, "timestamp": "2025-09-30 22:15:02.045461", "step": 1526, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:02.077628", "step": 1526, "epoch": 1 }, { "type": "loss", "content": 0.02120266482234001, "timestamp": "2025-09-30 22:15:02.088476", "step": 1527, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:02.131866", "step": 1527, "epoch": 1 }, { "type": "loss", "content": 0.05429844185709953, "timestamp": "2025-09-30 22:15:02.166163", "step": 1528, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:02.200272", "step": 1528, "epoch": 1 }, { "type": "loss", "content": 0.0243589635938406, "timestamp": "2025-09-30 22:15:02.208258", "step": 1529, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:02.241422", "step": 1529, "epoch": 1 }, { "type": "loss", "content": 0.02745889127254486, "timestamp": "2025-09-30 22:15:02.252511", "step": 1530, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:02.287663", "step": 1530, "epoch": 1 }, { "type": "loss", "content": 0.025427240878343582, "timestamp": "2025-09-30 22:15:02.301380", "step": 1531, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:02.335046", "step": 1531, "epoch": 1 }, { "type": "loss", "content": 0.018605539575219154, "timestamp": "2025-09-30 22:15:02.368455", "step": 1532, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:02.406488", "step": 1532, "epoch": 1 }, { "type": "loss", "content": 0.01158945169299841, "timestamp": "2025-09-30 22:15:02.415333", "step": 1533, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:02.451096", "step": 1533, "epoch": 1 }, { "type": "loss", "content": 0.0385240763425827, "timestamp": "2025-09-30 22:15:02.463587", "step": 1534, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:02.500080", "step": 1534, "epoch": 1 }, { "type": "loss", "content": 0.021807149052619934, "timestamp": "2025-09-30 22:15:02.511301", "step": 1535, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:02.543570", "step": 1535, "epoch": 1 }, { "type": "loss", "content": 0.013656704686582088, "timestamp": "2025-09-30 22:15:02.577040", "step": 1536, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:02.616217", "step": 1536, "epoch": 1 }, { "type": "loss", "content": 0.013688210397958755, "timestamp": "2025-09-30 22:15:02.628896", "step": 1537, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:02.673505", "step": 1537, "epoch": 1 }, { "type": "loss", "content": 0.03932627663016319, "timestamp": "2025-09-30 22:15:02.685787", "step": 1538, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:02.723870", "step": 1538, "epoch": 1 }, { "type": "loss", "content": 0.016051799058914185, "timestamp": "2025-09-30 22:15:02.736412", "step": 1539, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:02.770999", "step": 1539, "epoch": 1 }, { "type": "loss", "content": 0.04664245992898941, "timestamp": "2025-09-30 22:15:02.804290", "step": 1540, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:02.836058", "step": 1540, "epoch": 1 }, { "type": "loss", "content": 0.02219100296497345, "timestamp": "2025-09-30 22:15:02.844255", "step": 1541, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:02.890175", "step": 1541, "epoch": 1 }, { "type": "loss", "content": 0.00859779305756092, "timestamp": "2025-09-30 22:15:02.903836", "step": 1542, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:02.945985", "step": 1542, "epoch": 1 }, { "type": "loss", "content": 0.020811183378100395, "timestamp": "2025-09-30 22:15:02.958366", "step": 1543, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:02.996540", "step": 1543, "epoch": 1 }, { "type": "loss", "content": 0.021285343915224075, "timestamp": "2025-09-30 22:15:03.029735", "step": 1544, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:03.066709", "step": 1544, "epoch": 1 }, { "type": "loss", "content": 0.007851026952266693, "timestamp": "2025-09-30 22:15:03.076558", "step": 1545, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:03.118425", "step": 1545, "epoch": 1 }, { "type": "loss", "content": 0.027574323117733, "timestamp": "2025-09-30 22:15:03.131719", "step": 1546, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:03.166377", "step": 1546, "epoch": 1 }, { "type": "loss", "content": 0.01619553565979004, "timestamp": "2025-09-30 22:15:03.178657", "step": 1547, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:03.217768", "step": 1547, "epoch": 1 }, { "type": "loss", "content": 0.02190890908241272, "timestamp": "2025-09-30 22:15:03.250975", "step": 1548, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:03.294776", "step": 1548, "epoch": 1 }, { "type": "loss", "content": 0.007822610437870026, "timestamp": "2025-09-30 22:15:03.307499", "step": 1549, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:03.348489", "step": 1549, "epoch": 1 }, { "type": "loss", "content": 0.011164495721459389, "timestamp": "2025-09-30 22:15:03.361161", "step": 1550, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:03.408154", "step": 1550, "epoch": 1 }, { "type": "loss", "content": 0.013010852038860321, "timestamp": "2025-09-30 22:15:03.427345", "step": 1551, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:03.469761", "step": 1551, "epoch": 1 }, { "type": "loss", "content": 0.022824978455901146, "timestamp": "2025-09-30 22:15:03.504051", "step": 1552, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:03.539602", "step": 1552, "epoch": 1 }, { "type": "loss", "content": 0.012585001066327095, "timestamp": "2025-09-30 22:15:03.550501", "step": 1553, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:03.596933", "step": 1553, "epoch": 1 }, { "type": "loss", "content": 0.021539170295000076, "timestamp": "2025-09-30 22:15:03.608113", "step": 1554, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:03.648220", "step": 1554, "epoch": 1 }, { "type": "loss", "content": 0.03153735771775246, "timestamp": "2025-09-30 22:15:03.661559", "step": 1555, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:03.694963", "step": 1555, "epoch": 1 }, { "type": "loss", "content": 0.02265406958758831, "timestamp": "2025-09-30 22:15:03.723833", "step": 1556, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:03.765045", "step": 1556, "epoch": 1 }, { "type": "loss", "content": 0.018682723864912987, "timestamp": "2025-09-30 22:15:03.773124", "step": 1557, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:03.811401", "step": 1557, "epoch": 1 }, { "type": "loss", "content": 0.01089040283113718, "timestamp": "2025-09-30 22:15:03.822520", "step": 1558, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:03.863937", "step": 1558, "epoch": 1 }, { "type": "loss", "content": 0.01570407673716545, "timestamp": "2025-09-30 22:15:03.877649", "step": 1559, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:03.911488", "step": 1559, "epoch": 1 }, { "type": "loss", "content": 0.01560614537447691, "timestamp": "2025-09-30 22:15:03.940293", "step": 1560, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:03.975964", "step": 1560, "epoch": 1 }, { "type": "loss", "content": 0.022726111114025116, "timestamp": "2025-09-30 22:15:03.984180", "step": 1561, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:04.022196", "step": 1561, "epoch": 1 }, { "type": "loss", "content": 0.011036788113415241, "timestamp": "2025-09-30 22:15:04.036241", "step": 1562, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:04.074640", "step": 1562, "epoch": 1 }, { "type": "loss", "content": 0.022821931168437004, "timestamp": "2025-09-30 22:15:04.086027", "step": 1563, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:04.117162", "step": 1563, "epoch": 1 }, { "type": "loss", "content": 0.028916530311107635, "timestamp": "2025-09-30 22:15:04.148549", "step": 1564, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:04.191703", "step": 1564, "epoch": 1 }, { "type": "loss", "content": 0.027913669124245644, "timestamp": "2025-09-30 22:15:04.202150", "step": 1565, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:04.237009", "step": 1565, "epoch": 1 }, { "type": "loss", "content": 0.02377433516085148, "timestamp": "2025-09-30 22:15:04.249335", "step": 1566, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:04.286365", "step": 1566, "epoch": 1 }, { "type": "loss", "content": 0.016784582287073135, "timestamp": "2025-09-30 22:15:04.299763", "step": 1567, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:04.334662", "step": 1567, "epoch": 1 }, { "type": "loss", "content": 0.037801824510097504, "timestamp": "2025-09-30 22:15:04.366069", "step": 1568, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:04.398070", "step": 1568, "epoch": 1 }, { "type": "loss", "content": 0.0208644550293684, "timestamp": "2025-09-30 22:15:04.406141", "step": 1569, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:04.442067", "step": 1569, "epoch": 1 }, { "type": "loss", "content": 0.012212934903800488, "timestamp": "2025-09-30 22:15:04.454595", "step": 1570, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:04.492559", "step": 1570, "epoch": 1 }, { "type": "loss", "content": 0.013333462178707123, "timestamp": "2025-09-30 22:15:04.503625", "step": 1571, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:04.543265", "step": 1571, "epoch": 1 }, { "type": "loss", "content": 0.02915300987660885, "timestamp": "2025-09-30 22:15:04.576761", "step": 1572, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:04.612123", "step": 1572, "epoch": 1 }, { "type": "loss", "content": 0.023643821477890015, "timestamp": "2025-09-30 22:15:04.622946", "step": 1573, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:04.660747", "step": 1573, "epoch": 1 }, { "type": "loss", "content": 0.015274593606591225, "timestamp": "2025-09-30 22:15:04.673126", "step": 1574, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:04.710632", "step": 1574, "epoch": 1 }, { "type": "loss", "content": 0.021839918568730354, "timestamp": "2025-09-30 22:15:04.722993", "step": 1575, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:04.763437", "step": 1575, "epoch": 1 }, { "type": "loss", "content": 0.007588080130517483, "timestamp": "2025-09-30 22:15:04.797980", "step": 1576, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:04.832980", "step": 1576, "epoch": 1 }, { "type": "loss", "content": 0.014518532902002335, "timestamp": "2025-09-30 22:15:04.845610", "step": 1577, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:04.896414", "step": 1577, "epoch": 1 }, { "type": "loss", "content": 0.0101570850238204, "timestamp": "2025-09-30 22:15:04.912257", "step": 1578, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:04.957017", "step": 1578, "epoch": 1 }, { "type": "loss", "content": 0.006863059010356665, "timestamp": "2025-09-30 22:15:04.972686", "step": 1579, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:05.015281", "step": 1579, "epoch": 1 }, { "type": "loss", "content": 0.03003445453941822, "timestamp": "2025-09-30 22:15:05.049792", "step": 1580, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:05.096118", "step": 1580, "epoch": 1 }, { "type": "loss", "content": 0.026673344895243645, "timestamp": "2025-09-30 22:15:05.104815", "step": 1581, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:05.144333", "step": 1581, "epoch": 1 }, { "type": "loss", "content": 0.028324894607067108, "timestamp": "2025-09-30 22:15:05.155446", "step": 1582, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:05.192601", "step": 1582, "epoch": 1 }, { "type": "loss", "content": 0.01850230246782303, "timestamp": "2025-09-30 22:15:05.203181", "step": 1583, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:05.240604", "step": 1583, "epoch": 1 }, { "type": "loss", "content": 0.014605790376663208, "timestamp": "2025-09-30 22:15:05.274038", "step": 1584, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:05.316147", "step": 1584, "epoch": 1 }, { "type": "loss", "content": 0.01580674760043621, "timestamp": "2025-09-30 22:15:05.324007", "step": 1585, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:05.358589", "step": 1585, "epoch": 1 }, { "type": "loss", "content": 0.015010936185717583, "timestamp": "2025-09-30 22:15:05.370937", "step": 1586, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:05.404407", "step": 1586, "epoch": 1 }, { "type": "loss", "content": 0.04198862239718437, "timestamp": "2025-09-30 22:15:05.414748", "step": 1587, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:05.452183", "step": 1587, "epoch": 1 }, { "type": "loss", "content": 0.010779457166790962, "timestamp": "2025-09-30 22:15:05.485613", "step": 1588, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:05.524310", "step": 1588, "epoch": 1 }, { "type": "loss", "content": 0.037828974425792694, "timestamp": "2025-09-30 22:15:05.536946", "step": 1589, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:05.571561", "step": 1589, "epoch": 1 }, { "type": "loss", "content": 0.02178335376083851, "timestamp": "2025-09-30 22:15:05.582544", "step": 1590, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:05.617260", "step": 1590, "epoch": 1 }, { "type": "loss", "content": 0.01142160128802061, "timestamp": "2025-09-30 22:15:05.624741", "step": 1591, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:05.658317", "step": 1591, "epoch": 1 }, { "type": "loss", "content": 0.018499299883842468, "timestamp": "2025-09-30 22:15:05.689446", "step": 1592, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:05.732066", "step": 1592, "epoch": 1 }, { "type": "loss", "content": 0.030043240636587143, "timestamp": "2025-09-30 22:15:05.740822", "step": 1593, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:05.774048", "step": 1593, "epoch": 1 }, { "type": "loss", "content": 0.013827769085764885, "timestamp": "2025-09-30 22:15:05.786361", "step": 1594, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:05.825080", "step": 1594, "epoch": 1 }, { "type": "loss", "content": 0.021424157544970512, "timestamp": "2025-09-30 22:15:05.838944", "step": 1595, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:05.876781", "step": 1595, "epoch": 1 }, { "type": "loss", "content": 0.020284688100218773, "timestamp": "2025-09-30 22:15:05.906188", "step": 1596, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:05.956963", "step": 1596, "epoch": 1 }, { "type": "loss", "content": 0.013254090212285519, "timestamp": "2025-09-30 22:15:05.966961", "step": 1597, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:06.016279", "step": 1597, "epoch": 1 }, { "type": "loss", "content": 0.01100999303162098, "timestamp": "2025-09-30 22:15:06.030049", "step": 1598, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:06.072696", "step": 1598, "epoch": 1 }, { "type": "loss", "content": 0.018316803500056267, "timestamp": "2025-09-30 22:15:06.083007", "step": 1599, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:06.125635", "step": 1599, "epoch": 1 }, { "type": "loss", "content": 0.02024856209754944, "timestamp": "2025-09-30 22:15:06.158837", "step": 1600, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:06.202958", "step": 1600, "epoch": 1 }, { "type": "loss", "content": 0.01697261445224285, "timestamp": "2025-09-30 22:15:06.216298", "step": 1601, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:06.252496", "step": 1601, "epoch": 1 }, { "type": "loss", "content": 0.01887323521077633, "timestamp": "2025-09-30 22:15:06.265066", "step": 1602, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:06.308840", "step": 1602, "epoch": 1 }, { "type": "loss", "content": 0.02012869343161583, "timestamp": "2025-09-30 22:15:06.322235", "step": 1603, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:06.362188", "step": 1603, "epoch": 1 }, { "type": "loss", "content": 0.01736818440258503, "timestamp": "2025-09-30 22:15:06.396499", "step": 1604, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:06.431925", "step": 1604, "epoch": 1 }, { "type": "loss", "content": 0.0334581658244133, "timestamp": "2025-09-30 22:15:06.442444", "step": 1605, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:06.481912", "step": 1605, "epoch": 1 }, { "type": "loss", "content": 0.012905096635222435, "timestamp": "2025-09-30 22:15:06.495698", "step": 1606, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:06.544382", "step": 1606, "epoch": 1 }, { "type": "loss", "content": 0.01980365253984928, "timestamp": "2025-09-30 22:15:06.557736", "step": 1607, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:06.606310", "step": 1607, "epoch": 1 }, { "type": "loss", "content": 0.013451996259391308, "timestamp": "2025-09-30 22:15:06.639373", "step": 1608, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:06.687176", "step": 1608, "epoch": 1 }, { "type": "loss", "content": 0.029307778924703598, "timestamp": "2025-09-30 22:15:06.692827", "step": 1609, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:06.746176", "step": 1609, "epoch": 1 }, { "type": "loss", "content": 0.027767712250351906, "timestamp": "2025-09-30 22:15:06.757308", "step": 1610, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:15:09.347425", "step": 1610, "epoch": 1 }, { "type": "pplx", "content": 5.567295370613334, "timestamp": "2025-09-30 22:15:09.349820", "step": 1610, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:09.379989", "step": 1610, "epoch": 1 }, { "type": "loss", "content": 0.026089852675795555, "timestamp": "2025-09-30 22:15:09.392442", "step": 1611, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:09.435847", "step": 1611, "epoch": 1 }, { "type": "loss", "content": 0.018595224246382713, "timestamp": "2025-09-30 22:15:09.472876", "step": 1612, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:09.508124", "step": 1612, "epoch": 1 }, { "type": "loss", "content": 0.01098089013248682, "timestamp": "2025-09-30 22:15:09.520785", "step": 1613, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:09.570952", "step": 1613, "epoch": 1 }, { "type": "loss", "content": 0.013269396498799324, "timestamp": "2025-09-30 22:15:09.586510", "step": 1614, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:09.629996", "step": 1614, "epoch": 1 }, { "type": "loss", "content": 0.015588633716106415, "timestamp": "2025-09-30 22:15:09.642200", "step": 1615, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:09.675728", "step": 1615, "epoch": 1 }, { "type": "loss", "content": 0.012610826641321182, "timestamp": "2025-09-30 22:15:09.709174", "step": 1616, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:09.745424", "step": 1616, "epoch": 1 }, { "type": "loss", "content": 0.0474841482937336, "timestamp": "2025-09-30 22:15:09.753326", "step": 1617, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:09.793672", "step": 1617, "epoch": 1 }, { "type": "loss", "content": 0.02974524535238743, "timestamp": "2025-09-30 22:15:09.806017", "step": 1618, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:09.846413", "step": 1618, "epoch": 1 }, { "type": "loss", "content": 0.024764923378825188, "timestamp": "2025-09-30 22:15:09.860135", "step": 1619, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:15:09.892123", "step": 1619, "epoch": 1 }, { "type": "loss", "content": 0.041059788316488266, "timestamp": "2025-09-30 22:15:09.917495", "step": 1620, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:09.967799", "step": 1620, "epoch": 1 }, { "type": "loss", "content": 0.04249605908989906, "timestamp": "2025-09-30 22:15:09.982914", "step": 1621, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:15:10.014911", "step": 1621, "epoch": 1 }, { "type": "loss", "content": 0.040544331073760986, "timestamp": "2025-09-30 22:15:10.019407", "step": 1622, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:10.066393", "step": 1622, "epoch": 1 }, { "type": "loss", "content": 0.018109586089849472, "timestamp": "2025-09-30 22:15:10.082479", "step": 1623, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:10.126180", "step": 1623, "epoch": 1 }, { "type": "loss", "content": 0.016633780673146248, "timestamp": "2025-09-30 22:15:10.153788", "step": 1624, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:10.188864", "step": 1624, "epoch": 1 }, { "type": "loss", "content": 0.018415579572319984, "timestamp": "2025-09-30 22:15:10.197659", "step": 1625, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:10.233300", "step": 1625, "epoch": 1 }, { "type": "loss", "content": 0.02845713496208191, "timestamp": "2025-09-30 22:15:10.244430", "step": 1626, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:10.278719", "step": 1626, "epoch": 1 }, { "type": "loss", "content": 0.029067419469356537, "timestamp": "2025-09-30 22:15:10.289307", "step": 1627, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:10.332600", "step": 1627, "epoch": 1 }, { "type": "loss", "content": 0.018506374210119247, "timestamp": "2025-09-30 22:15:10.365800", "step": 1628, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:10.398468", "step": 1628, "epoch": 1 }, { "type": "loss", "content": 0.03130076453089714, "timestamp": "2025-09-30 22:15:10.403394", "step": 1629, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:10.458144", "step": 1629, "epoch": 1 }, { "type": "loss", "content": 0.018053654581308365, "timestamp": "2025-09-30 22:15:10.470461", "step": 1630, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:10.521124", "step": 1630, "epoch": 1 }, { "type": "loss", "content": 0.013748985715210438, "timestamp": "2025-09-30 22:15:10.532348", "step": 1631, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:10.572156", "step": 1631, "epoch": 1 }, { "type": "loss", "content": 0.014565156772732735, "timestamp": "2025-09-30 22:15:10.601102", "step": 1632, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:15:10.634696", "step": 1632, "epoch": 1 }, { "type": "loss", "content": 0.05316678807139397, "timestamp": "2025-09-30 22:15:10.646155", "step": 1633, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:10.681703", "step": 1633, "epoch": 1 }, { "type": "loss", "content": 0.02041662111878395, "timestamp": "2025-09-30 22:15:10.694079", "step": 1634, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:10.742607", "step": 1634, "epoch": 1 }, { "type": "loss", "content": 0.017785055562853813, "timestamp": "2025-09-30 22:15:10.758527", "step": 1635, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:10.793464", "step": 1635, "epoch": 1 }, { "type": "loss", "content": 0.012598762288689613, "timestamp": "2025-09-30 22:15:10.821246", "step": 1636, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:10.861028", "step": 1636, "epoch": 1 }, { "type": "loss", "content": 0.01525981817394495, "timestamp": "2025-09-30 22:15:10.871535", "step": 1637, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:10.903740", "step": 1637, "epoch": 1 }, { "type": "loss", "content": 0.03392323479056358, "timestamp": "2025-09-30 22:15:10.916056", "step": 1638, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:10.953982", "step": 1638, "epoch": 1 }, { "type": "loss", "content": 0.024084903299808502, "timestamp": "2025-09-30 22:15:10.964510", "step": 1639, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:11.006336", "step": 1639, "epoch": 1 }, { "type": "loss", "content": 0.03673992678523064, "timestamp": "2025-09-30 22:15:11.040952", "step": 1640, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:11.105431", "step": 1640, "epoch": 1 }, { "type": "loss", "content": 0.027965785935521126, "timestamp": "2025-09-30 22:15:11.114035", "step": 1641, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:11.147881", "step": 1641, "epoch": 1 }, { "type": "loss", "content": 0.021707849577069283, "timestamp": "2025-09-30 22:15:11.160273", "step": 1642, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:11.199425", "step": 1642, "epoch": 1 }, { "type": "loss", "content": 0.020364973694086075, "timestamp": "2025-09-30 22:15:11.211979", "step": 1643, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:11.246269", "step": 1643, "epoch": 1 }, { "type": "loss", "content": 0.012984007596969604, "timestamp": "2025-09-30 22:15:11.279461", "step": 1644, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:11.320454", "step": 1644, "epoch": 1 }, { "type": "loss", "content": 0.018483446910977364, "timestamp": "2025-09-30 22:15:11.333494", "step": 1645, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:11.373288", "step": 1645, "epoch": 1 }, { "type": "loss", "content": 0.03060707449913025, "timestamp": "2025-09-30 22:15:11.386629", "step": 1646, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:11.430698", "step": 1646, "epoch": 1 }, { "type": "loss", "content": 0.013780564069747925, "timestamp": "2025-09-30 22:15:11.444058", "step": 1647, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:11.484199", "step": 1647, "epoch": 1 }, { "type": "loss", "content": 0.00868815928697586, "timestamp": "2025-09-30 22:15:11.517443", "step": 1648, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:11.554542", "step": 1648, "epoch": 1 }, { "type": "loss", "content": 0.017985939979553223, "timestamp": "2025-09-30 22:15:11.564519", "step": 1649, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:11.597091", "step": 1649, "epoch": 1 }, { "type": "loss", "content": 0.008981360122561455, "timestamp": "2025-09-30 22:15:11.607558", "step": 1650, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:11.649241", "step": 1650, "epoch": 1 }, { "type": "loss", "content": 0.026895396411418915, "timestamp": "2025-09-30 22:15:11.656780", "step": 1651, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:11.701964", "step": 1651, "epoch": 1 }, { "type": "loss", "content": 0.016994664445519447, "timestamp": "2025-09-30 22:15:11.733066", "step": 1652, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:11.768252", "step": 1652, "epoch": 1 }, { "type": "loss", "content": 0.0100490041077137, "timestamp": "2025-09-30 22:15:11.781235", "step": 1653, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:11.817627", "step": 1653, "epoch": 1 }, { "type": "loss", "content": 0.029902230948209763, "timestamp": "2025-09-30 22:15:11.824879", "step": 1654, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:11.867331", "step": 1654, "epoch": 1 }, { "type": "loss", "content": 0.026958175003528595, "timestamp": "2025-09-30 22:15:11.881006", "step": 1655, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:11.915803", "step": 1655, "epoch": 1 }, { "type": "loss", "content": 0.02336595207452774, "timestamp": "2025-09-30 22:15:11.944464", "step": 1656, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:11.984098", "step": 1656, "epoch": 1 }, { "type": "loss", "content": 0.0225741695612669, "timestamp": "2025-09-30 22:15:11.991907", "step": 1657, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:12.029238", "step": 1657, "epoch": 1 }, { "type": "loss", "content": 0.018445616587996483, "timestamp": "2025-09-30 22:15:12.041742", "step": 1658, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:12.080374", "step": 1658, "epoch": 1 }, { "type": "loss", "content": 0.01366068609058857, "timestamp": "2025-09-30 22:15:12.087910", "step": 1659, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:12.137396", "step": 1659, "epoch": 1 }, { "type": "loss", "content": 0.012483044527471066, "timestamp": "2025-09-30 22:15:12.171635", "step": 1660, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:12.206344", "step": 1660, "epoch": 1 }, { "type": "loss", "content": 0.02146543562412262, "timestamp": "2025-09-30 22:15:12.215016", "step": 1661, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:12.254632", "step": 1661, "epoch": 1 }, { "type": "loss", "content": 0.023363564163446426, "timestamp": "2025-09-30 22:15:12.262439", "step": 1662, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:12.306923", "step": 1662, "epoch": 1 }, { "type": "loss", "content": 0.024465836584568024, "timestamp": "2025-09-30 22:15:12.318997", "step": 1663, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:12.363484", "step": 1663, "epoch": 1 }, { "type": "loss", "content": 0.032095007598400116, "timestamp": "2025-09-30 22:15:12.410191", "step": 1664, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:12.471169", "step": 1664, "epoch": 1 }, { "type": "loss", "content": 0.016160208731889725, "timestamp": "2025-09-30 22:15:12.481747", "step": 1665, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:12.529492", "step": 1665, "epoch": 1 }, { "type": "loss", "content": 0.007151294033974409, "timestamp": "2025-09-30 22:15:12.542852", "step": 1666, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:12.599241", "step": 1666, "epoch": 1 }, { "type": "loss", "content": 0.0183419156819582, "timestamp": "2025-09-30 22:15:12.620858", "step": 1667, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:12.691914", "step": 1667, "epoch": 1 }, { "type": "loss", "content": 0.008691221475601196, "timestamp": "2025-09-30 22:15:12.728991", "step": 1668, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:12.775732", "step": 1668, "epoch": 1 }, { "type": "loss", "content": 0.013635504990816116, "timestamp": "2025-09-30 22:15:12.788431", "step": 1669, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:12.831372", "step": 1669, "epoch": 1 }, { "type": "loss", "content": 0.0186114851385355, "timestamp": "2025-09-30 22:15:12.843908", "step": 1670, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:12.894916", "step": 1670, "epoch": 1 }, { "type": "loss", "content": 0.008745511062443256, "timestamp": "2025-09-30 22:15:12.919761", "step": 1671, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:12.966732", "step": 1671, "epoch": 1 }, { "type": "loss", "content": 0.00956717412918806, "timestamp": "2025-09-30 22:15:13.001388", "step": 1672, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:13.040547", "step": 1672, "epoch": 1 }, { "type": "loss", "content": 0.018159685656428337, "timestamp": "2025-09-30 22:15:13.055612", "step": 1673, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:13.118496", "step": 1673, "epoch": 1 }, { "type": "loss", "content": 0.010431898757815361, "timestamp": "2025-09-30 22:15:13.134289", "step": 1674, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:13.181376", "step": 1674, "epoch": 1 }, { "type": "loss", "content": 0.01341898925602436, "timestamp": "2025-09-30 22:15:13.193997", "step": 1675, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:13.232693", "step": 1675, "epoch": 1 }, { "type": "loss", "content": 0.0198319423943758, "timestamp": "2025-09-30 22:15:13.275123", "step": 1676, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:13.325554", "step": 1676, "epoch": 1 }, { "type": "loss", "content": 0.01101736817508936, "timestamp": "2025-09-30 22:15:13.348949", "step": 1677, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:13.404663", "step": 1677, "epoch": 1 }, { "type": "loss", "content": 0.05434549227356911, "timestamp": "2025-09-30 22:15:13.416335", "step": 1678, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:13.461161", "step": 1678, "epoch": 1 }, { "type": "loss", "content": 0.016798565164208412, "timestamp": "2025-09-30 22:15:13.474310", "step": 1679, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:13.519923", "step": 1679, "epoch": 1 }, { "type": "loss", "content": 0.012180604040622711, "timestamp": "2025-09-30 22:15:13.550315", "step": 1680, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:13.634714", "step": 1680, "epoch": 1 }, { "type": "loss", "content": 0.013805767521262169, "timestamp": "2025-09-30 22:15:13.646294", "step": 1681, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:15:13.696300", "step": 1681, "epoch": 1 }, { "type": "loss", "content": 0.009852793999016285, "timestamp": "2025-09-30 22:15:13.719365", "step": 1682, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:13.770660", "step": 1682, "epoch": 1 }, { "type": "loss", "content": 0.01591717265546322, "timestamp": "2025-09-30 22:15:13.786861", "step": 1683, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:13.828935", "step": 1683, "epoch": 1 }, { "type": "loss", "content": 0.028100574389100075, "timestamp": "2025-09-30 22:15:13.863250", "step": 1684, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:13.903144", "step": 1684, "epoch": 1 }, { "type": "loss", "content": 0.013175170868635178, "timestamp": "2025-09-30 22:15:13.912845", "step": 1685, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:13.949682", "step": 1685, "epoch": 1 }, { "type": "loss", "content": 0.02460007183253765, "timestamp": "2025-09-30 22:15:13.963116", "step": 1686, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:14.002022", "step": 1686, "epoch": 1 }, { "type": "loss", "content": 0.025650952011346817, "timestamp": "2025-09-30 22:15:14.015339", "step": 1687, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:14.054111", "step": 1687, "epoch": 1 }, { "type": "loss", "content": 0.013419519178569317, "timestamp": "2025-09-30 22:15:14.088402", "step": 1688, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:14.125169", "step": 1688, "epoch": 1 }, { "type": "loss", "content": 0.01797398552298546, "timestamp": "2025-09-30 22:15:14.138333", "step": 1689, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:14.174713", "step": 1689, "epoch": 1 }, { "type": "loss", "content": 0.02820388600230217, "timestamp": "2025-09-30 22:15:14.185214", "step": 1690, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:14.233387", "step": 1690, "epoch": 1 }, { "type": "loss", "content": 0.01589238829910755, "timestamp": "2025-09-30 22:15:14.247079", "step": 1691, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:14.280376", "step": 1691, "epoch": 1 }, { "type": "loss", "content": 0.01470992062240839, "timestamp": "2025-09-30 22:15:14.313614", "step": 1692, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:14.351496", "step": 1692, "epoch": 1 }, { "type": "loss", "content": 0.010090518742799759, "timestamp": "2025-09-30 22:15:14.364584", "step": 1693, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:14.397685", "step": 1693, "epoch": 1 }, { "type": "loss", "content": 0.028140120208263397, "timestamp": "2025-09-30 22:15:14.410233", "step": 1694, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:14.445849", "step": 1694, "epoch": 1 }, { "type": "loss", "content": 0.036818306893110275, "timestamp": "2025-09-30 22:15:14.453786", "step": 1695, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:14.492868", "step": 1695, "epoch": 1 }, { "type": "loss", "content": 0.0343332402408123, "timestamp": "2025-09-30 22:15:14.527084", "step": 1696, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:14.568832", "step": 1696, "epoch": 1 }, { "type": "loss", "content": 0.03022080473601818, "timestamp": "2025-09-30 22:15:14.584033", "step": 1697, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:14.616168", "step": 1697, "epoch": 1 }, { "type": "loss", "content": 0.028232431039214134, "timestamp": "2025-09-30 22:15:14.628516", "step": 1698, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:14.670814", "step": 1698, "epoch": 1 }, { "type": "loss", "content": 0.027424275875091553, "timestamp": "2025-09-30 22:15:14.683392", "step": 1699, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:14.721004", "step": 1699, "epoch": 1 }, { "type": "loss", "content": 0.05194878950715065, "timestamp": "2025-09-30 22:15:14.754163", "step": 1700, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:14.790900", "step": 1700, "epoch": 1 }, { "type": "loss", "content": 0.023143520578742027, "timestamp": "2025-09-30 22:15:14.799620", "step": 1701, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:14.835941", "step": 1701, "epoch": 1 }, { "type": "loss", "content": 0.021254858002066612, "timestamp": "2025-09-30 22:15:14.847208", "step": 1702, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:14.884841", "step": 1702, "epoch": 1 }, { "type": "loss", "content": 0.0150804677978158, "timestamp": "2025-09-30 22:15:14.895272", "step": 1703, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:14.931348", "step": 1703, "epoch": 1 }, { "type": "loss", "content": 0.0160662978887558, "timestamp": "2025-09-30 22:15:14.962603", "step": 1704, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:14.996595", "step": 1704, "epoch": 1 }, { "type": "loss", "content": 0.018683334812521935, "timestamp": "2025-09-30 22:15:15.005509", "step": 1705, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:15.044911", "step": 1705, "epoch": 1 }, { "type": "loss", "content": 0.01358802616596222, "timestamp": "2025-09-30 22:15:15.056036", "step": 1706, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:15.089359", "step": 1706, "epoch": 1 }, { "type": "loss", "content": 0.018810700625181198, "timestamp": "2025-09-30 22:15:15.096862", "step": 1707, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:15.144559", "step": 1707, "epoch": 1 }, { "type": "loss", "content": 0.026481501758098602, "timestamp": "2025-09-30 22:15:15.179146", "step": 1708, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:15.215178", "step": 1708, "epoch": 1 }, { "type": "loss", "content": 0.02003110758960247, "timestamp": "2025-09-30 22:15:15.228157", "step": 1709, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:15.265597", "step": 1709, "epoch": 1 }, { "type": "loss", "content": 0.016395648941397667, "timestamp": "2025-09-30 22:15:15.278139", "step": 1710, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:15.309718", "step": 1710, "epoch": 1 }, { "type": "loss", "content": 0.020903056487441063, "timestamp": "2025-09-30 22:15:15.320190", "step": 1711, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:15.352436", "step": 1711, "epoch": 1 }, { "type": "loss", "content": 0.02075912617146969, "timestamp": "2025-09-30 22:15:15.384441", "step": 1712, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:15.419582", "step": 1712, "epoch": 1 }, { "type": "loss", "content": 0.02496561035513878, "timestamp": "2025-09-30 22:15:15.435869", "step": 1713, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:15.482040", "step": 1713, "epoch": 1 }, { "type": "loss", "content": 0.025438209995627403, "timestamp": "2025-09-30 22:15:15.495744", "step": 1714, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:15:15.539585", "step": 1714, "epoch": 1 }, { "type": "loss", "content": 0.02345852367579937, "timestamp": "2025-09-30 22:15:15.555923", "step": 1715, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:15.592113", "step": 1715, "epoch": 1 }, { "type": "loss", "content": 0.017462914809584618, "timestamp": "2025-09-30 22:15:15.624105", "step": 1716, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:15.660417", "step": 1716, "epoch": 1 }, { "type": "loss", "content": 0.017815817147493362, "timestamp": "2025-09-30 22:15:15.670255", "step": 1717, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:15.704918", "step": 1717, "epoch": 1 }, { "type": "loss", "content": 0.010522090829908848, "timestamp": "2025-09-30 22:15:15.717541", "step": 1718, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:15.750633", "step": 1718, "epoch": 1 }, { "type": "loss", "content": 0.020719345659017563, "timestamp": "2025-09-30 22:15:15.760908", "step": 1719, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:15.795165", "step": 1719, "epoch": 1 }, { "type": "loss", "content": 0.03029588609933853, "timestamp": "2025-09-30 22:15:15.828569", "step": 1720, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:15.866673", "step": 1720, "epoch": 1 }, { "type": "loss", "content": 0.03135362267494202, "timestamp": "2025-09-30 22:15:15.875475", "step": 1721, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:15.912720", "step": 1721, "epoch": 1 }, { "type": "loss", "content": 0.021656924858689308, "timestamp": "2025-09-30 22:15:15.923760", "step": 1722, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:15.964457", "step": 1722, "epoch": 1 }, { "type": "loss", "content": 0.01336958073079586, "timestamp": "2025-09-30 22:15:15.977016", "step": 1723, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:16.009623", "step": 1723, "epoch": 1 }, { "type": "loss", "content": 0.04008239880204201, "timestamp": "2025-09-30 22:15:16.042772", "step": 1724, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:16.075780", "step": 1724, "epoch": 1 }, { "type": "loss", "content": 0.018738439306616783, "timestamp": "2025-09-30 22:15:16.081369", "step": 1725, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:15:18.522572", "step": 1725, "epoch": 1 }, { "type": "pplx", "content": 5.524427428699606, "timestamp": "2025-09-30 22:15:18.525301", "step": 1725, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:18.563053", "step": 1725, "epoch": 1 }, { "type": "loss", "content": 0.009471976198256016, "timestamp": "2025-09-30 22:15:18.576396", "step": 1726, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:18.609903", "step": 1726, "epoch": 1 }, { "type": "loss", "content": 0.034076958894729614, "timestamp": "2025-09-30 22:15:18.617656", "step": 1727, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:18.661195", "step": 1727, "epoch": 1 }, { "type": "loss", "content": 0.006048847455531359, "timestamp": "2025-09-30 22:15:18.695827", "step": 1728, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:18.730077", "step": 1728, "epoch": 1 }, { "type": "loss", "content": 0.022094430401921272, "timestamp": "2025-09-30 22:15:18.738016", "step": 1729, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:18.777458", "step": 1729, "epoch": 1 }, { "type": "loss", "content": 0.022269519045948982, "timestamp": "2025-09-30 22:15:18.784378", "step": 1730, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:18.821527", "step": 1730, "epoch": 1 }, { "type": "loss", "content": 0.030709749087691307, "timestamp": "2025-09-30 22:15:18.832475", "step": 1731, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:18.867738", "step": 1731, "epoch": 1 }, { "type": "loss", "content": 0.026105714961886406, "timestamp": "2025-09-30 22:15:18.899092", "step": 1732, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:18.942742", "step": 1732, "epoch": 1 }, { "type": "loss", "content": 0.023841775953769684, "timestamp": "2025-09-30 22:15:18.948539", "step": 1733, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:18.985012", "step": 1733, "epoch": 1 }, { "type": "loss", "content": 0.018309002742171288, "timestamp": "2025-09-30 22:15:18.997177", "step": 1734, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:19.039996", "step": 1734, "epoch": 1 }, { "type": "loss", "content": 0.014661869965493679, "timestamp": "2025-09-30 22:15:19.052312", "step": 1735, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:19.098608", "step": 1735, "epoch": 1 }, { "type": "loss", "content": 0.029143791645765305, "timestamp": "2025-09-30 22:15:19.133206", "step": 1736, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:19.177766", "step": 1736, "epoch": 1 }, { "type": "loss", "content": 0.03220280632376671, "timestamp": "2025-09-30 22:15:19.190883", "step": 1737, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:19.224088", "step": 1737, "epoch": 1 }, { "type": "loss", "content": 0.010954529978334904, "timestamp": "2025-09-30 22:15:19.233474", "step": 1738, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:15:19.272941", "step": 1738, "epoch": 1 }, { "type": "loss", "content": 0.025374675169587135, "timestamp": "2025-09-30 22:15:19.277166", "step": 1739, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:15:19.314726", "step": 1739, "epoch": 1 }, { "type": "loss", "content": 0.010610057041049004, "timestamp": "2025-09-30 22:15:19.339900", "step": 1740, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:19.383524", "step": 1740, "epoch": 1 }, { "type": "loss", "content": 0.01448817364871502, "timestamp": "2025-09-30 22:15:19.393469", "step": 1741, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:19.428391", "step": 1741, "epoch": 1 }, { "type": "loss", "content": 0.010096672922372818, "timestamp": "2025-09-30 22:15:19.438880", "step": 1742, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:19.473712", "step": 1742, "epoch": 1 }, { "type": "loss", "content": 0.004360891878604889, "timestamp": "2025-09-30 22:15:19.484050", "step": 1743, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:19.527998", "step": 1743, "epoch": 1 }, { "type": "loss", "content": 0.021376613527536392, "timestamp": "2025-09-30 22:15:19.560949", "step": 1744, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:19.600419", "step": 1744, "epoch": 1 }, { "type": "loss", "content": 0.010968276299536228, "timestamp": "2025-09-30 22:15:19.610294", "step": 1745, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:19.646393", "step": 1745, "epoch": 1 }, { "type": "loss", "content": 0.016172928735613823, "timestamp": "2025-09-30 22:15:19.655510", "step": 1746, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:19.698685", "step": 1746, "epoch": 1 }, { "type": "loss", "content": 0.008436311967670918, "timestamp": "2025-09-30 22:15:19.711304", "step": 1747, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:19.744843", "step": 1747, "epoch": 1 }, { "type": "loss", "content": 0.01623164303600788, "timestamp": "2025-09-30 22:15:19.778335", "step": 1748, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:19.814602", "step": 1748, "epoch": 1 }, { "type": "loss", "content": 0.033938564360141754, "timestamp": "2025-09-30 22:15:19.823264", "step": 1749, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:19.867354", "step": 1749, "epoch": 1 }, { "type": "loss", "content": 0.01624465547502041, "timestamp": "2025-09-30 22:15:19.880668", "step": 1750, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:19.916953", "step": 1750, "epoch": 1 }, { "type": "loss", "content": 0.0248996801674366, "timestamp": "2025-09-30 22:15:19.924824", "step": 1751, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:19.956200", "step": 1751, "epoch": 1 }, { "type": "loss", "content": 0.018772227689623833, "timestamp": "2025-09-30 22:15:19.987554", "step": 1752, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:20.020058", "step": 1752, "epoch": 1 }, { "type": "loss", "content": 0.01700027659535408, "timestamp": "2025-09-30 22:15:20.025435", "step": 1753, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:20.058097", "step": 1753, "epoch": 1 }, { "type": "loss", "content": 0.024260636419057846, "timestamp": "2025-09-30 22:15:20.066066", "step": 1754, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:20.101018", "step": 1754, "epoch": 1 }, { "type": "loss", "content": 0.014491290785372257, "timestamp": "2025-09-30 22:15:20.114365", "step": 1755, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:20.147112", "step": 1755, "epoch": 1 }, { "type": "loss", "content": 0.017340457066893578, "timestamp": "2025-09-30 22:15:20.179126", "step": 1756, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:20.217277", "step": 1756, "epoch": 1 }, { "type": "loss", "content": 0.03053688071668148, "timestamp": "2025-09-30 22:15:20.222894", "step": 1757, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:20.261376", "step": 1757, "epoch": 1 }, { "type": "loss", "content": 0.011630667373538017, "timestamp": "2025-09-30 22:15:20.274697", "step": 1758, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:15:20.323682", "step": 1758, "epoch": 1 }, { "type": "loss", "content": 0.010422976687550545, "timestamp": "2025-09-30 22:15:20.341262", "step": 1759, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:20.381298", "step": 1759, "epoch": 1 }, { "type": "loss", "content": 0.00819334015250206, "timestamp": "2025-09-30 22:15:20.415832", "step": 1760, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:20.453585", "step": 1760, "epoch": 1 }, { "type": "loss", "content": 0.012914934195578098, "timestamp": "2025-09-30 22:15:20.468659", "step": 1761, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:20.502785", "step": 1761, "epoch": 1 }, { "type": "loss", "content": 0.02971467934548855, "timestamp": "2025-09-30 22:15:20.515354", "step": 1762, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:20.551770", "step": 1762, "epoch": 1 }, { "type": "loss", "content": 0.00758015364408493, "timestamp": "2025-09-30 22:15:20.564296", "step": 1763, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:20.603621", "step": 1763, "epoch": 1 }, { "type": "loss", "content": 0.010941659100353718, "timestamp": "2025-09-30 22:15:20.638224", "step": 1764, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:20.681274", "step": 1764, "epoch": 1 }, { "type": "loss", "content": 0.014606650918722153, "timestamp": "2025-09-30 22:15:20.691790", "step": 1765, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:20.726870", "step": 1765, "epoch": 1 }, { "type": "loss", "content": 0.018656129017472267, "timestamp": "2025-09-30 22:15:20.739287", "step": 1766, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:20.778474", "step": 1766, "epoch": 1 }, { "type": "loss", "content": 0.01316360104829073, "timestamp": "2025-09-30 22:15:20.792174", "step": 1767, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:20.830398", "step": 1767, "epoch": 1 }, { "type": "loss", "content": 0.008767510764300823, "timestamp": "2025-09-30 22:15:20.861530", "step": 1768, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:20.896665", "step": 1768, "epoch": 1 }, { "type": "loss", "content": 0.022422747686505318, "timestamp": "2025-09-30 22:15:20.906481", "step": 1769, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:20.960049", "step": 1769, "epoch": 1 }, { "type": "loss", "content": 0.013530876487493515, "timestamp": "2025-09-30 22:15:20.973856", "step": 1770, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:21.009694", "step": 1770, "epoch": 1 }, { "type": "loss", "content": 0.016074175015091896, "timestamp": "2025-09-30 22:15:21.022218", "step": 1771, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:21.059845", "step": 1771, "epoch": 1 }, { "type": "loss", "content": 0.012821718119084835, "timestamp": "2025-09-30 22:15:21.091305", "step": 1772, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:21.126100", "step": 1772, "epoch": 1 }, { "type": "loss", "content": 0.01885077729821205, "timestamp": "2025-09-30 22:15:21.134767", "step": 1773, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:21.172538", "step": 1773, "epoch": 1 }, { "type": "loss", "content": 0.01258346438407898, "timestamp": "2025-09-30 22:15:21.183845", "step": 1774, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:21.216032", "step": 1774, "epoch": 1 }, { "type": "loss", "content": 0.02077512815594673, "timestamp": "2025-09-30 22:15:21.223993", "step": 1775, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:21.263595", "step": 1775, "epoch": 1 }, { "type": "loss", "content": 0.004540475085377693, "timestamp": "2025-09-30 22:15:21.298157", "step": 1776, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:21.339064", "step": 1776, "epoch": 1 }, { "type": "loss", "content": 0.006810082122683525, "timestamp": "2025-09-30 22:15:21.354483", "step": 1777, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:21.401475", "step": 1777, "epoch": 1 }, { "type": "loss", "content": 0.006215658038854599, "timestamp": "2025-09-30 22:15:21.415147", "step": 1778, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:21.461281", "step": 1778, "epoch": 1 }, { "type": "loss", "content": 0.005509376525878906, "timestamp": "2025-09-30 22:15:21.475121", "step": 1779, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:21.510225", "step": 1779, "epoch": 1 }, { "type": "loss", "content": 0.010733134113252163, "timestamp": "2025-09-30 22:15:21.543598", "step": 1780, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:21.582176", "step": 1780, "epoch": 1 }, { "type": "loss", "content": 0.01937573403120041, "timestamp": "2025-09-30 22:15:21.592264", "step": 1781, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:21.641408", "step": 1781, "epoch": 1 }, { "type": "loss", "content": 0.007524130865931511, "timestamp": "2025-09-30 22:15:21.653969", "step": 1782, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:21.699905", "step": 1782, "epoch": 1 }, { "type": "loss", "content": 0.0070258439518511295, "timestamp": "2025-09-30 22:15:21.715826", "step": 1783, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:21.750804", "step": 1783, "epoch": 1 }, { "type": "loss", "content": 0.0137464739382267, "timestamp": "2025-09-30 22:15:21.779359", "step": 1784, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:21.823451", "step": 1784, "epoch": 1 }, { "type": "loss", "content": 0.017076196148991585, "timestamp": "2025-09-30 22:15:21.839145", "step": 1785, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:21.879757", "step": 1785, "epoch": 1 }, { "type": "loss", "content": 0.03697328642010689, "timestamp": "2025-09-30 22:15:21.891180", "step": 1786, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:21.927460", "step": 1786, "epoch": 1 }, { "type": "loss", "content": 0.014486968517303467, "timestamp": "2025-09-30 22:15:21.938851", "step": 1787, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:21.978780", "step": 1787, "epoch": 1 }, { "type": "loss", "content": 0.008746178820729256, "timestamp": "2025-09-30 22:15:22.010210", "step": 1788, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:22.066507", "step": 1788, "epoch": 1 }, { "type": "loss", "content": 0.008364402689039707, "timestamp": "2025-09-30 22:15:22.082207", "step": 1789, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:22.134773", "step": 1789, "epoch": 1 }, { "type": "loss", "content": 0.013731135986745358, "timestamp": "2025-09-30 22:15:22.148609", "step": 1790, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:22.196051", "step": 1790, "epoch": 1 }, { "type": "loss", "content": 0.006996935233473778, "timestamp": "2025-09-30 22:15:22.209963", "step": 1791, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:15:22.264298", "step": 1791, "epoch": 1 }, { "type": "loss", "content": 0.008566569536924362, "timestamp": "2025-09-30 22:15:22.302446", "step": 1792, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:22.340129", "step": 1792, "epoch": 1 }, { "type": "loss", "content": 0.014082583598792553, "timestamp": "2025-09-30 22:15:22.348979", "step": 1793, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:22.390495", "step": 1793, "epoch": 1 }, { "type": "loss", "content": 0.02005489356815815, "timestamp": "2025-09-30 22:15:22.402838", "step": 1794, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:22.447617", "step": 1794, "epoch": 1 }, { "type": "loss", "content": 0.010589111596345901, "timestamp": "2025-09-30 22:15:22.461330", "step": 1795, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:22.502133", "step": 1795, "epoch": 1 }, { "type": "loss", "content": 0.004992394242435694, "timestamp": "2025-09-30 22:15:22.537039", "step": 1796, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:22.571758", "step": 1796, "epoch": 1 }, { "type": "loss", "content": 0.025007154792547226, "timestamp": "2025-09-30 22:15:22.580431", "step": 1797, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:22.621276", "step": 1797, "epoch": 1 }, { "type": "loss", "content": 0.00899725966155529, "timestamp": "2025-09-30 22:15:22.634672", "step": 1798, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:22.688607", "step": 1798, "epoch": 1 }, { "type": "loss", "content": 0.013840748928487301, "timestamp": "2025-09-30 22:15:22.702278", "step": 1799, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:22.739073", "step": 1799, "epoch": 1 }, { "type": "loss", "content": 0.022443510591983795, "timestamp": "2025-09-30 22:15:22.772309", "step": 1800, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:22.807669", "step": 1800, "epoch": 1 }, { "type": "loss", "content": 0.011950202286243439, "timestamp": "2025-09-30 22:15:22.817784", "step": 1801, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:22.853827", "step": 1801, "epoch": 1 }, { "type": "loss", "content": 0.01878458447754383, "timestamp": "2025-09-30 22:15:22.864912", "step": 1802, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:22.907941", "step": 1802, "epoch": 1 }, { "type": "loss", "content": 0.02448650635778904, "timestamp": "2025-09-30 22:15:22.918359", "step": 1803, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:22.950555", "step": 1803, "epoch": 1 }, { "type": "loss", "content": 0.019893376156687737, "timestamp": "2025-09-30 22:15:22.982407", "step": 1804, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:23.014406", "step": 1804, "epoch": 1 }, { "type": "loss", "content": 0.010442492552101612, "timestamp": "2025-09-30 22:15:23.020014", "step": 1805, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:23.054053", "step": 1805, "epoch": 1 }, { "type": "loss", "content": 0.011205971240997314, "timestamp": "2025-09-30 22:15:23.061030", "step": 1806, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:23.097700", "step": 1806, "epoch": 1 }, { "type": "loss", "content": 0.007506354711949825, "timestamp": "2025-09-30 22:15:23.111084", "step": 1807, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:23.149451", "step": 1807, "epoch": 1 }, { "type": "loss", "content": 0.027089744806289673, "timestamp": "2025-09-30 22:15:23.182835", "step": 1808, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:23.216940", "step": 1808, "epoch": 1 }, { "type": "loss", "content": 0.017995381727814674, "timestamp": "2025-09-30 22:15:23.227348", "step": 1809, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:23.260626", "step": 1809, "epoch": 1 }, { "type": "loss", "content": 0.02709285542368889, "timestamp": "2025-09-30 22:15:23.273232", "step": 1810, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:23.312977", "step": 1810, "epoch": 1 }, { "type": "loss", "content": 0.03118719719350338, "timestamp": "2025-09-30 22:15:23.326368", "step": 1811, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:23.360594", "step": 1811, "epoch": 1 }, { "type": "loss", "content": 0.026631765067577362, "timestamp": "2025-09-30 22:15:23.393810", "step": 1812, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:23.430423", "step": 1812, "epoch": 1 }, { "type": "loss", "content": 0.009016057476401329, "timestamp": "2025-09-30 22:15:23.443727", "step": 1813, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:23.480525", "step": 1813, "epoch": 1 }, { "type": "loss", "content": 0.009003258310258389, "timestamp": "2025-09-30 22:15:23.492827", "step": 1814, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 592 ], "flops": 17560600598464 }, "timestamp": "2025-09-30 22:15:23.544774", "step": 1814, "epoch": 1 }, { "type": "loss", "content": 0.005577434320002794, "timestamp": "2025-09-30 22:15:23.565852", "step": 1815, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:23.603139", "step": 1815, "epoch": 1 }, { "type": "loss", "content": 0.013796670362353325, "timestamp": "2025-09-30 22:15:23.637356", "step": 1816, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:23.677517", "step": 1816, "epoch": 1 }, { "type": "loss", "content": 0.010871457867324352, "timestamp": "2025-09-30 22:15:23.683954", "step": 1817, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:23.717131", "step": 1817, "epoch": 1 }, { "type": "loss", "content": 0.006785405334085226, "timestamp": "2025-09-30 22:15:23.729503", "step": 1818, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:23.770701", "step": 1818, "epoch": 1 }, { "type": "loss", "content": 0.008428267203271389, "timestamp": "2025-09-30 22:15:23.784159", "step": 1819, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:23.817435", "step": 1819, "epoch": 1 }, { "type": "loss", "content": 0.009874596260488033, "timestamp": "2025-09-30 22:15:23.850883", "step": 1820, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:23.885048", "step": 1820, "epoch": 1 }, { "type": "loss", "content": 0.02326430380344391, "timestamp": "2025-09-30 22:15:23.897644", "step": 1821, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:23.930177", "step": 1821, "epoch": 1 }, { "type": "loss", "content": 0.03274618089199066, "timestamp": "2025-09-30 22:15:23.941399", "step": 1822, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:23.977828", "step": 1822, "epoch": 1 }, { "type": "loss", "content": 0.023377005010843277, "timestamp": "2025-09-30 22:15:23.990183", "step": 1823, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:24.027694", "step": 1823, "epoch": 1 }, { "type": "loss", "content": 0.02934858575463295, "timestamp": "2025-09-30 22:15:24.062433", "step": 1824, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:24.102236", "step": 1824, "epoch": 1 }, { "type": "loss", "content": 0.0181177519261837, "timestamp": "2025-09-30 22:15:24.114869", "step": 1825, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:24.152510", "step": 1825, "epoch": 1 }, { "type": "loss", "content": 0.01756267435848713, "timestamp": "2025-09-30 22:15:24.165081", "step": 1826, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:24.202823", "step": 1826, "epoch": 1 }, { "type": "loss", "content": 0.03452328220009804, "timestamp": "2025-09-30 22:15:24.216543", "step": 1827, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:24.251387", "step": 1827, "epoch": 1 }, { "type": "loss", "content": 0.024465546011924744, "timestamp": "2025-09-30 22:15:24.284646", "step": 1828, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:24.317717", "step": 1828, "epoch": 1 }, { "type": "loss", "content": 0.038967929780483246, "timestamp": "2025-09-30 22:15:24.327653", "step": 1829, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:24.362319", "step": 1829, "epoch": 1 }, { "type": "loss", "content": 0.026917487382888794, "timestamp": "2025-09-30 22:15:24.373471", "step": 1830, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:24.406705", "step": 1830, "epoch": 1 }, { "type": "loss", "content": 0.025900596752762794, "timestamp": "2025-09-30 22:15:24.417218", "step": 1831, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:24.459304", "step": 1831, "epoch": 1 }, { "type": "loss", "content": 0.00981822144240141, "timestamp": "2025-09-30 22:15:24.494096", "step": 1832, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:24.537747", "step": 1832, "epoch": 1 }, { "type": "loss", "content": 0.023126764222979546, "timestamp": "2025-09-30 22:15:24.550816", "step": 1833, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:15:24.595360", "step": 1833, "epoch": 1 }, { "type": "loss", "content": 0.013973300345242023, "timestamp": "2025-09-30 22:15:24.613093", "step": 1834, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:24.657748", "step": 1834, "epoch": 1 }, { "type": "loss", "content": 0.017417822033166885, "timestamp": "2025-09-30 22:15:24.673657", "step": 1835, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:24.708575", "step": 1835, "epoch": 1 }, { "type": "loss", "content": 0.004481332842260599, "timestamp": "2025-09-30 22:15:24.742836", "step": 1836, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:24.777196", "step": 1836, "epoch": 1 }, { "type": "loss", "content": 0.009193511679768562, "timestamp": "2025-09-30 22:15:24.787173", "step": 1837, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:24.823652", "step": 1837, "epoch": 1 }, { "type": "loss", "content": 0.012367206625640392, "timestamp": "2025-09-30 22:15:24.837056", "step": 1838, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:24.881330", "step": 1838, "epoch": 1 }, { "type": "loss", "content": 0.012220718897879124, "timestamp": "2025-09-30 22:15:24.894705", "step": 1839, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:24.936285", "step": 1839, "epoch": 1 }, { "type": "loss", "content": 0.012313958257436752, "timestamp": "2025-09-30 22:15:24.970866", "step": 1840, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:15:27.455257", "step": 1840, "epoch": 1 }, { "type": "pplx", "content": 5.489145934259954, "timestamp": "2025-09-30 22:15:27.457920", "step": 1840, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:27.488465", "step": 1840, "epoch": 1 }, { "type": "loss", "content": 0.004935309290885925, "timestamp": "2025-09-30 22:15:27.501114", "step": 1841, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:27.538849", "step": 1841, "epoch": 1 }, { "type": "loss", "content": 0.015119443647563457, "timestamp": "2025-09-30 22:15:27.551428", "step": 1842, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:27.594161", "step": 1842, "epoch": 1 }, { "type": "loss", "content": 0.011127560399472713, "timestamp": "2025-09-30 22:15:27.607974", "step": 1843, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:27.646596", "step": 1843, "epoch": 1 }, { "type": "loss", "content": 0.017564384266734123, "timestamp": "2025-09-30 22:15:27.674867", "step": 1844, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:27.709911", "step": 1844, "epoch": 2 }, { "type": "loss", "content": 0.03666895255446434, "timestamp": "2025-09-30 22:15:27.715216", "step": 1845, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:27.754659", "step": 1845, "epoch": 2 }, { "type": "loss", "content": 0.008158748969435692, "timestamp": "2025-09-30 22:15:27.768020", "step": 1846, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:27.801840", "step": 1846, "epoch": 2 }, { "type": "loss", "content": 0.013101806864142418, "timestamp": "2025-09-30 22:15:27.809376", "step": 1847, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:27.861537", "step": 1847, "epoch": 2 }, { "type": "loss", "content": 0.013875213451683521, "timestamp": "2025-09-30 22:15:27.893791", "step": 1848, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:27.934819", "step": 1848, "epoch": 2 }, { "type": "loss", "content": 0.010416905395686626, "timestamp": "2025-09-30 22:15:27.944907", "step": 1849, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:27.981397", "step": 1849, "epoch": 2 }, { "type": "loss", "content": 0.011625699698925018, "timestamp": "2025-09-30 22:15:27.988937", "step": 1850, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:28.027331", "step": 1850, "epoch": 2 }, { "type": "loss", "content": 0.015321994200348854, "timestamp": "2025-09-30 22:15:28.040676", "step": 1851, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:28.074429", "step": 1851, "epoch": 2 }, { "type": "loss", "content": 0.010722902603447437, "timestamp": "2025-09-30 22:15:28.107823", "step": 1852, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:15:28.151752", "step": 1852, "epoch": 2 }, { "type": "loss", "content": 0.010753704234957695, "timestamp": "2025-09-30 22:15:28.168448", "step": 1853, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:28.204256", "step": 1853, "epoch": 2 }, { "type": "loss", "content": 0.009430734440684319, "timestamp": "2025-09-30 22:15:28.215559", "step": 1854, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:28.256649", "step": 1854, "epoch": 2 }, { "type": "loss", "content": 0.006714122835546732, "timestamp": "2025-09-30 22:15:28.270147", "step": 1855, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:28.308013", "step": 1855, "epoch": 2 }, { "type": "loss", "content": 0.012612263672053814, "timestamp": "2025-09-30 22:15:28.342248", "step": 1856, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:28.389282", "step": 1856, "epoch": 2 }, { "type": "loss", "content": 0.012894387356936932, "timestamp": "2025-09-30 22:15:28.396669", "step": 1857, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:28.437005", "step": 1857, "epoch": 2 }, { "type": "loss", "content": 0.012102191336452961, "timestamp": "2025-09-30 22:15:28.445003", "step": 1858, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:28.483923", "step": 1858, "epoch": 2 }, { "type": "loss", "content": 0.013301697559654713, "timestamp": "2025-09-30 22:15:28.495288", "step": 1859, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:28.532470", "step": 1859, "epoch": 2 }, { "type": "loss", "content": 0.013101636432111263, "timestamp": "2025-09-30 22:15:28.566735", "step": 1860, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:28.602883", "step": 1860, "epoch": 2 }, { "type": "loss", "content": 0.018009858205914497, "timestamp": "2025-09-30 22:15:28.611014", "step": 1861, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:28.653408", "step": 1861, "epoch": 2 }, { "type": "loss", "content": 0.017011038959026337, "timestamp": "2025-09-30 22:15:28.665869", "step": 1862, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:28.712007", "step": 1862, "epoch": 2 }, { "type": "loss", "content": 0.016825055703520775, "timestamp": "2025-09-30 22:15:28.719248", "step": 1863, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:28.755095", "step": 1863, "epoch": 2 }, { "type": "loss", "content": 0.022498799487948418, "timestamp": "2025-09-30 22:15:28.783852", "step": 1864, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:28.823770", "step": 1864, "epoch": 2 }, { "type": "loss", "content": 0.015230205841362476, "timestamp": "2025-09-30 22:15:28.832932", "step": 1865, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:28.894586", "step": 1865, "epoch": 2 }, { "type": "loss", "content": 0.006908940616995096, "timestamp": "2025-09-30 22:15:28.902454", "step": 1866, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:28.934193", "step": 1866, "epoch": 2 }, { "type": "loss", "content": 0.013190867379307747, "timestamp": "2025-09-30 22:15:28.941223", "step": 1867, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:28.976258", "step": 1867, "epoch": 2 }, { "type": "loss", "content": 0.01497811358422041, "timestamp": "2025-09-30 22:15:29.004232", "step": 1868, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:29.046964", "step": 1868, "epoch": 2 }, { "type": "loss", "content": 0.00782166887074709, "timestamp": "2025-09-30 22:15:29.060054", "step": 1869, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:29.100544", "step": 1869, "epoch": 2 }, { "type": "loss", "content": 0.023333149030804634, "timestamp": "2025-09-30 22:15:29.108894", "step": 1870, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:29.144079", "step": 1870, "epoch": 2 }, { "type": "loss", "content": 0.011829240247607231, "timestamp": "2025-09-30 22:15:29.157857", "step": 1871, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:29.192571", "step": 1871, "epoch": 2 }, { "type": "loss", "content": 0.017776671797037125, "timestamp": "2025-09-30 22:15:29.226031", "step": 1872, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:29.261788", "step": 1872, "epoch": 2 }, { "type": "loss", "content": 0.007805516477674246, "timestamp": "2025-09-30 22:15:29.272561", "step": 1873, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:29.322182", "step": 1873, "epoch": 2 }, { "type": "loss", "content": 0.009270432405173779, "timestamp": "2025-09-30 22:15:29.335524", "step": 1874, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:29.373774", "step": 1874, "epoch": 2 }, { "type": "loss", "content": 0.01128674391657114, "timestamp": "2025-09-30 22:15:29.384947", "step": 1875, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:29.420416", "step": 1875, "epoch": 2 }, { "type": "loss", "content": 0.010258953087031841, "timestamp": "2025-09-30 22:15:29.449146", "step": 1876, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:29.488855", "step": 1876, "epoch": 2 }, { "type": "loss", "content": 0.008005455136299133, "timestamp": "2025-09-30 22:15:29.501837", "step": 1877, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:29.547812", "step": 1877, "epoch": 2 }, { "type": "loss", "content": 0.016554296016693115, "timestamp": "2025-09-30 22:15:29.558946", "step": 1878, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:29.598181", "step": 1878, "epoch": 2 }, { "type": "loss", "content": 0.015631001442670822, "timestamp": "2025-09-30 22:15:29.611924", "step": 1879, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:29.647928", "step": 1879, "epoch": 2 }, { "type": "loss", "content": 0.015927642583847046, "timestamp": "2025-09-30 22:15:29.682503", "step": 1880, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:29.719745", "step": 1880, "epoch": 2 }, { "type": "loss", "content": 0.005917059723287821, "timestamp": "2025-09-30 22:15:29.725380", "step": 1881, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:29.757829", "step": 1881, "epoch": 2 }, { "type": "loss", "content": 0.01006174273788929, "timestamp": "2025-09-30 22:15:29.768971", "step": 1882, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:29.806877", "step": 1882, "epoch": 2 }, { "type": "loss", "content": 0.02184908092021942, "timestamp": "2025-09-30 22:15:29.814142", "step": 1883, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:29.848059", "step": 1883, "epoch": 2 }, { "type": "loss", "content": 0.014168507419526577, "timestamp": "2025-09-30 22:15:29.875806", "step": 1884, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:29.909335", "step": 1884, "epoch": 2 }, { "type": "loss", "content": 0.01987319439649582, "timestamp": "2025-09-30 22:15:29.915014", "step": 1885, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:29.948518", "step": 1885, "epoch": 2 }, { "type": "loss", "content": 0.011883490718901157, "timestamp": "2025-09-30 22:15:29.958854", "step": 1886, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:15:30.007776", "step": 1886, "epoch": 2 }, { "type": "loss", "content": 0.010094949044287205, "timestamp": "2025-09-30 22:15:30.025490", "step": 1887, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:30.063403", "step": 1887, "epoch": 2 }, { "type": "loss", "content": 0.009513383731245995, "timestamp": "2025-09-30 22:15:30.097906", "step": 1888, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:30.136806", "step": 1888, "epoch": 2 }, { "type": "loss", "content": 0.008315491490066051, "timestamp": "2025-09-30 22:15:30.152260", "step": 1889, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:30.184560", "step": 1889, "epoch": 2 }, { "type": "loss", "content": 0.014158587902784348, "timestamp": "2025-09-30 22:15:30.196948", "step": 1890, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:30.229527", "step": 1890, "epoch": 2 }, { "type": "loss", "content": 0.010483119636774063, "timestamp": "2025-09-30 22:15:30.237505", "step": 1891, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:30.275983", "step": 1891, "epoch": 2 }, { "type": "loss", "content": 0.01435977965593338, "timestamp": "2025-09-30 22:15:30.308088", "step": 1892, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:30.342026", "step": 1892, "epoch": 2 }, { "type": "loss", "content": 0.014692501164972782, "timestamp": "2025-09-30 22:15:30.347595", "step": 1893, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:30.388102", "step": 1893, "epoch": 2 }, { "type": "loss", "content": 0.020229607820510864, "timestamp": "2025-09-30 22:15:30.399129", "step": 1894, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:30.438020", "step": 1894, "epoch": 2 }, { "type": "loss", "content": 0.011172110214829445, "timestamp": "2025-09-30 22:15:30.445555", "step": 1895, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:30.485998", "step": 1895, "epoch": 2 }, { "type": "loss", "content": 0.009735530242323875, "timestamp": "2025-09-30 22:15:30.520145", "step": 1896, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:30.555929", "step": 1896, "epoch": 2 }, { "type": "loss", "content": 0.008219888433814049, "timestamp": "2025-09-30 22:15:30.569007", "step": 1897, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:30.604067", "step": 1897, "epoch": 2 }, { "type": "loss", "content": 0.010688601061701775, "timestamp": "2025-09-30 22:15:30.615381", "step": 1898, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:30.647848", "step": 1898, "epoch": 2 }, { "type": "loss", "content": 0.01074580755084753, "timestamp": "2025-09-30 22:15:30.658938", "step": 1899, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:30.700789", "step": 1899, "epoch": 2 }, { "type": "loss", "content": 0.013557717204093933, "timestamp": "2025-09-30 22:15:30.733906", "step": 1900, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:30.766861", "step": 1900, "epoch": 2 }, { "type": "loss", "content": 0.01604650542140007, "timestamp": "2025-09-30 22:15:30.777383", "step": 1901, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:30.809507", "step": 1901, "epoch": 2 }, { "type": "loss", "content": 0.009455199353396893, "timestamp": "2025-09-30 22:15:30.820647", "step": 1902, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:30.861685", "step": 1902, "epoch": 2 }, { "type": "loss", "content": 0.012455038726329803, "timestamp": "2025-09-30 22:15:30.868948", "step": 1903, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:30.900470", "step": 1903, "epoch": 2 }, { "type": "loss", "content": 0.02439979650080204, "timestamp": "2025-09-30 22:15:30.928618", "step": 1904, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:30.966569", "step": 1904, "epoch": 2 }, { "type": "loss", "content": 0.007738007232546806, "timestamp": "2025-09-30 22:15:30.971320", "step": 1905, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:31.007374", "step": 1905, "epoch": 2 }, { "type": "loss", "content": 0.011472243815660477, "timestamp": "2025-09-30 22:15:31.014754", "step": 1906, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:31.048064", "step": 1906, "epoch": 2 }, { "type": "loss", "content": 0.011051525361835957, "timestamp": "2025-09-30 22:15:31.060384", "step": 1907, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:31.101469", "step": 1907, "epoch": 2 }, { "type": "loss", "content": 0.0075341323390603065, "timestamp": "2025-09-30 22:15:31.136321", "step": 1908, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:31.177066", "step": 1908, "epoch": 2 }, { "type": "loss", "content": 0.00750238262116909, "timestamp": "2025-09-30 22:15:31.190405", "step": 1909, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:31.225961", "step": 1909, "epoch": 2 }, { "type": "loss", "content": 0.01275754626840353, "timestamp": "2025-09-30 22:15:31.233322", "step": 1910, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:31.265878", "step": 1910, "epoch": 2 }, { "type": "loss", "content": 0.011358684860169888, "timestamp": "2025-09-30 22:15:31.278196", "step": 1911, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:31.311841", "step": 1911, "epoch": 2 }, { "type": "loss", "content": 0.008774158544838428, "timestamp": "2025-09-30 22:15:31.345127", "step": 1912, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:31.387727", "step": 1912, "epoch": 2 }, { "type": "loss", "content": 0.011165403760969639, "timestamp": "2025-09-30 22:15:31.396360", "step": 1913, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:31.434477", "step": 1913, "epoch": 2 }, { "type": "loss", "content": 0.014243747107684612, "timestamp": "2025-09-30 22:15:31.448307", "step": 1914, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:31.481873", "step": 1914, "epoch": 2 }, { "type": "loss", "content": 0.012929615564644337, "timestamp": "2025-09-30 22:15:31.489764", "step": 1915, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:31.523105", "step": 1915, "epoch": 2 }, { "type": "loss", "content": 0.007347370497882366, "timestamp": "2025-09-30 22:15:31.556356", "step": 1916, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:31.594760", "step": 1916, "epoch": 2 }, { "type": "loss", "content": 0.014166549779474735, "timestamp": "2025-09-30 22:15:31.602674", "step": 1917, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:31.637824", "step": 1917, "epoch": 2 }, { "type": "loss", "content": 0.009590953588485718, "timestamp": "2025-09-30 22:15:31.650371", "step": 1918, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:31.685602", "step": 1918, "epoch": 2 }, { "type": "loss", "content": 0.009673715569078922, "timestamp": "2025-09-30 22:15:31.696631", "step": 1919, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:31.734965", "step": 1919, "epoch": 2 }, { "type": "loss", "content": 0.012877479195594788, "timestamp": "2025-09-30 22:15:31.763382", "step": 1920, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:31.796360", "step": 1920, "epoch": 2 }, { "type": "loss", "content": 0.011299233883619308, "timestamp": "2025-09-30 22:15:31.806052", "step": 1921, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:31.838739", "step": 1921, "epoch": 2 }, { "type": "loss", "content": 0.011492876335978508, "timestamp": "2025-09-30 22:15:31.851293", "step": 1922, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:31.888932", "step": 1922, "epoch": 2 }, { "type": "loss", "content": 0.01344871986657381, "timestamp": "2025-09-30 22:15:31.900198", "step": 1923, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:31.933676", "step": 1923, "epoch": 2 }, { "type": "loss", "content": 0.017432425171136856, "timestamp": "2025-09-30 22:15:31.961503", "step": 1924, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:31.994546", "step": 1924, "epoch": 2 }, { "type": "loss", "content": 0.01488548144698143, "timestamp": "2025-09-30 22:15:32.002526", "step": 1925, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:32.039100", "step": 1925, "epoch": 2 }, { "type": "loss", "content": 0.00806745421141386, "timestamp": "2025-09-30 22:15:32.046853", "step": 1926, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:32.081888", "step": 1926, "epoch": 2 }, { "type": "loss", "content": 0.010180181823670864, "timestamp": "2025-09-30 22:15:32.095269", "step": 1927, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:32.144012", "step": 1927, "epoch": 2 }, { "type": "loss", "content": 0.004572300240397453, "timestamp": "2025-09-30 22:15:32.181160", "step": 1928, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:32.217812", "step": 1928, "epoch": 2 }, { "type": "loss", "content": 0.017586616799235344, "timestamp": "2025-09-30 22:15:32.230435", "step": 1929, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:32.267724", "step": 1929, "epoch": 2 }, { "type": "loss", "content": 0.008072668686509132, "timestamp": "2025-09-30 22:15:32.281696", "step": 1930, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:32.318774", "step": 1930, "epoch": 2 }, { "type": "loss", "content": 0.007447035517543554, "timestamp": "2025-09-30 22:15:32.332628", "step": 1931, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:32.375710", "step": 1931, "epoch": 2 }, { "type": "loss", "content": 0.006811977364122868, "timestamp": "2025-09-30 22:15:32.409914", "step": 1932, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:32.450362", "step": 1932, "epoch": 2 }, { "type": "loss", "content": 0.01102065946906805, "timestamp": "2025-09-30 22:15:32.463483", "step": 1933, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:32.504920", "step": 1933, "epoch": 2 }, { "type": "loss", "content": 0.015133448876440525, "timestamp": "2025-09-30 22:15:32.511624", "step": 1934, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:32.551127", "step": 1934, "epoch": 2 }, { "type": "loss", "content": 0.007282888051122427, "timestamp": "2025-09-30 22:15:32.561367", "step": 1935, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:32.597859", "step": 1935, "epoch": 2 }, { "type": "loss", "content": 0.012841354124248028, "timestamp": "2025-09-30 22:15:32.626608", "step": 1936, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:32.669019", "step": 1936, "epoch": 2 }, { "type": "loss", "content": 0.012392260134220123, "timestamp": "2025-09-30 22:15:32.676924", "step": 1937, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:32.718886", "step": 1937, "epoch": 2 }, { "type": "loss", "content": 0.011032551527023315, "timestamp": "2025-09-30 22:15:32.730017", "step": 1938, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:32.785612", "step": 1938, "epoch": 2 }, { "type": "loss", "content": 0.012614678591489792, "timestamp": "2025-09-30 22:15:32.799365", "step": 1939, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:32.837506", "step": 1939, "epoch": 2 }, { "type": "loss", "content": 0.00897765439003706, "timestamp": "2025-09-30 22:15:32.870617", "step": 1940, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:32.906731", "step": 1940, "epoch": 2 }, { "type": "loss", "content": 0.023669099435210228, "timestamp": "2025-09-30 22:15:32.914709", "step": 1941, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:32.951958", "step": 1941, "epoch": 2 }, { "type": "loss", "content": 0.014903804287314415, "timestamp": "2025-09-30 22:15:32.964526", "step": 1942, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:33.003799", "step": 1942, "epoch": 2 }, { "type": "loss", "content": 0.013701890595257282, "timestamp": "2025-09-30 22:15:33.017484", "step": 1943, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:33.054319", "step": 1943, "epoch": 2 }, { "type": "loss", "content": 0.01658545434474945, "timestamp": "2025-09-30 22:15:33.088543", "step": 1944, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:33.125388", "step": 1944, "epoch": 2 }, { "type": "loss", "content": 0.0041729118674993515, "timestamp": "2025-09-30 22:15:33.138406", "step": 1945, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:33.172919", "step": 1945, "epoch": 2 }, { "type": "loss", "content": 0.008692542091012001, "timestamp": "2025-09-30 22:15:33.185242", "step": 1946, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:33.225723", "step": 1946, "epoch": 2 }, { "type": "loss", "content": 0.008979223668575287, "timestamp": "2025-09-30 22:15:33.238150", "step": 1947, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:33.270928", "step": 1947, "epoch": 2 }, { "type": "loss", "content": 0.010823034681379795, "timestamp": "2025-09-30 22:15:33.299366", "step": 1948, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:33.336355", "step": 1948, "epoch": 2 }, { "type": "loss", "content": 0.006081897299736738, "timestamp": "2025-09-30 22:15:33.344469", "step": 1949, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:33.392724", "step": 1949, "epoch": 2 }, { "type": "loss", "content": 0.0050694928504526615, "timestamp": "2025-09-30 22:15:33.406606", "step": 1950, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:33.441887", "step": 1950, "epoch": 2 }, { "type": "loss", "content": 0.0053678094409406185, "timestamp": "2025-09-30 22:15:33.454471", "step": 1951, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:33.488182", "step": 1951, "epoch": 2 }, { "type": "loss", "content": 0.014238450676202774, "timestamp": "2025-09-30 22:15:33.519617", "step": 1952, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:33.552277", "step": 1952, "epoch": 2 }, { "type": "loss", "content": 0.00962776318192482, "timestamp": "2025-09-30 22:15:33.557629", "step": 1953, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:33.591179", "step": 1953, "epoch": 2 }, { "type": "loss", "content": 0.019318213686347008, "timestamp": "2025-09-30 22:15:33.603697", "step": 1954, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:33.658879", "step": 1954, "epoch": 2 }, { "type": "loss", "content": 0.009837604127824306, "timestamp": "2025-09-30 22:15:33.675030", "step": 1955, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:15:36.132305", "step": 1955, "epoch": 2 }, { "type": "pplx", "content": 5.504560427648865, "timestamp": "2025-09-30 22:15:36.135356", "step": 1955, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:36.166512", "step": 1955, "epoch": 2 }, { "type": "loss", "content": 0.01425859797745943, "timestamp": "2025-09-30 22:15:36.199266", "step": 1956, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:36.240897", "step": 1956, "epoch": 2 }, { "type": "loss", "content": 0.008966123685240746, "timestamp": "2025-09-30 22:15:36.254250", "step": 1957, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:36.287770", "step": 1957, "epoch": 2 }, { "type": "loss", "content": 0.0134968226775527, "timestamp": "2025-09-30 22:15:36.295764", "step": 1958, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:36.334086", "step": 1958, "epoch": 2 }, { "type": "loss", "content": 0.025526832789182663, "timestamp": "2025-09-30 22:15:36.340977", "step": 1959, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:36.390284", "step": 1959, "epoch": 2 }, { "type": "loss", "content": 0.01609751395881176, "timestamp": "2025-09-30 22:15:36.426974", "step": 1960, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:36.458804", "step": 1960, "epoch": 2 }, { "type": "loss", "content": 0.0084404731169343, "timestamp": "2025-09-30 22:15:36.466841", "step": 1961, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:36.503477", "step": 1961, "epoch": 2 }, { "type": "loss", "content": 0.01829015463590622, "timestamp": "2025-09-30 22:15:36.515770", "step": 1962, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:36.548569", "step": 1962, "epoch": 2 }, { "type": "loss", "content": 0.019106082618236542, "timestamp": "2025-09-30 22:15:36.559766", "step": 1963, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:36.598885", "step": 1963, "epoch": 2 }, { "type": "loss", "content": 0.007889645174145699, "timestamp": "2025-09-30 22:15:36.633629", "step": 1964, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:36.672751", "step": 1964, "epoch": 2 }, { "type": "loss", "content": 0.014179641380906105, "timestamp": "2025-09-30 22:15:36.685856", "step": 1965, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:36.725432", "step": 1965, "epoch": 2 }, { "type": "loss", "content": 0.010651156306266785, "timestamp": "2025-09-30 22:15:36.737992", "step": 1966, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:36.778377", "step": 1966, "epoch": 2 }, { "type": "loss", "content": 0.014606158249080181, "timestamp": "2025-09-30 22:15:36.792238", "step": 1967, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:36.838165", "step": 1967, "epoch": 2 }, { "type": "loss", "content": 0.006826434750109911, "timestamp": "2025-09-30 22:15:36.874893", "step": 1968, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:36.919277", "step": 1968, "epoch": 2 }, { "type": "loss", "content": 0.006176867987960577, "timestamp": "2025-09-30 22:15:36.934738", "step": 1969, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:36.979028", "step": 1969, "epoch": 2 }, { "type": "loss", "content": 0.007977073080837727, "timestamp": "2025-09-30 22:15:36.994790", "step": 1970, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:37.029441", "step": 1970, "epoch": 2 }, { "type": "loss", "content": 0.009346827864646912, "timestamp": "2025-09-30 22:15:37.040201", "step": 1971, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 560 ], "flops": 16611393146432 }, "timestamp": "2025-09-30 22:15:37.090937", "step": 1971, "epoch": 2 }, { "type": "loss", "content": 0.01219925656914711, "timestamp": "2025-09-30 22:15:37.131111", "step": 1972, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:15:37.177297", "step": 1972, "epoch": 2 }, { "type": "loss", "content": 0.008181479759514332, "timestamp": "2025-09-30 22:15:37.193935", "step": 1973, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:37.235947", "step": 1973, "epoch": 2 }, { "type": "loss", "content": 0.00793197751045227, "timestamp": "2025-09-30 22:15:37.250047", "step": 1974, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:37.285881", "step": 1974, "epoch": 2 }, { "type": "loss", "content": 0.011092676781117916, "timestamp": "2025-09-30 22:15:37.298269", "step": 1975, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:37.336821", "step": 1975, "epoch": 2 }, { "type": "loss", "content": 0.012023183517158031, "timestamp": "2025-09-30 22:15:37.371036", "step": 1976, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:15:37.414944", "step": 1976, "epoch": 2 }, { "type": "loss", "content": 0.0072458055801689625, "timestamp": "2025-09-30 22:15:37.431648", "step": 1977, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:15:37.476108", "step": 1977, "epoch": 2 }, { "type": "loss", "content": 0.007181333377957344, "timestamp": "2025-09-30 22:15:37.493683", "step": 1978, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:37.529952", "step": 1978, "epoch": 2 }, { "type": "loss", "content": 0.01450754888355732, "timestamp": "2025-09-30 22:15:37.543686", "step": 1979, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:37.576735", "step": 1979, "epoch": 2 }, { "type": "loss", "content": 0.0155011722818017, "timestamp": "2025-09-30 22:15:37.604843", "step": 1980, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:37.639143", "step": 1980, "epoch": 2 }, { "type": "loss", "content": 0.00863499566912651, "timestamp": "2025-09-30 22:15:37.646961", "step": 1981, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:37.681276", "step": 1981, "epoch": 2 }, { "type": "loss", "content": 0.011826743371784687, "timestamp": "2025-09-30 22:15:37.693853", "step": 1982, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:37.726685", "step": 1982, "epoch": 2 }, { "type": "loss", "content": 0.014029378071427345, "timestamp": "2025-09-30 22:15:37.737793", "step": 1983, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:37.772111", "step": 1983, "epoch": 2 }, { "type": "loss", "content": 0.019402116537094116, "timestamp": "2025-09-30 22:15:37.803136", "step": 1984, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:37.835752", "step": 1984, "epoch": 2 }, { "type": "loss", "content": 0.009639054536819458, "timestamp": "2025-09-30 22:15:37.841505", "step": 1985, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:37.889704", "step": 1985, "epoch": 2 }, { "type": "loss", "content": 0.023592282086610794, "timestamp": "2025-09-30 22:15:37.903073", "step": 1986, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:37.944177", "step": 1986, "epoch": 2 }, { "type": "loss", "content": 0.011838329955935478, "timestamp": "2025-09-30 22:15:37.960263", "step": 1987, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:37.998669", "step": 1987, "epoch": 2 }, { "type": "loss", "content": 0.012354640290141106, "timestamp": "2025-09-30 22:15:38.027259", "step": 1988, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:38.066402", "step": 1988, "epoch": 2 }, { "type": "loss", "content": 0.01696304976940155, "timestamp": "2025-09-30 22:15:38.074365", "step": 1989, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:38.112169", "step": 1989, "epoch": 2 }, { "type": "loss", "content": 0.009608439169824123, "timestamp": "2025-09-30 22:15:38.120173", "step": 1990, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:38.157074", "step": 1990, "epoch": 2 }, { "type": "loss", "content": 0.014672442339360714, "timestamp": "2025-09-30 22:15:38.164668", "step": 1991, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:38.199989", "step": 1991, "epoch": 2 }, { "type": "loss", "content": 0.019588777795433998, "timestamp": "2025-09-30 22:15:38.227949", "step": 1992, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:38.263627", "step": 1992, "epoch": 2 }, { "type": "loss", "content": 0.010438477620482445, "timestamp": "2025-09-30 22:15:38.274166", "step": 1993, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:38.310768", "step": 1993, "epoch": 2 }, { "type": "loss", "content": 0.02435128763318062, "timestamp": "2025-09-30 22:15:38.322005", "step": 1994, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:38.358400", "step": 1994, "epoch": 2 }, { "type": "loss", "content": 0.010515746660530567, "timestamp": "2025-09-30 22:15:38.369585", "step": 1995, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:38.414287", "step": 1995, "epoch": 2 }, { "type": "loss", "content": 0.012999563477933407, "timestamp": "2025-09-30 22:15:38.449136", "step": 1996, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:38.490885", "step": 1996, "epoch": 2 }, { "type": "loss", "content": 0.012872045859694481, "timestamp": "2025-09-30 22:15:38.501572", "step": 1997, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:38.540942", "step": 1997, "epoch": 2 }, { "type": "loss", "content": 0.007690249476581812, "timestamp": "2025-09-30 22:15:38.554644", "step": 1998, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:38.597112", "step": 1998, "epoch": 2 }, { "type": "loss", "content": 0.013428461737930775, "timestamp": "2025-09-30 22:15:38.610777", "step": 1999, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:38.651793", "step": 1999, "epoch": 2 }, { "type": "loss", "content": 0.010164592415094376, "timestamp": "2025-09-30 22:15:38.688301", "step": 2000, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 2000", "timestamp": "2025-09-30 22:15:43.740044", "step": 2000, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:43.778379", "step": 2000, "epoch": 2 }, { "type": "loss", "content": 0.010619484819471836, "timestamp": "2025-09-30 22:15:43.786307", "step": 2001, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:43.828402", "step": 2001, "epoch": 2 }, { "type": "loss", "content": 0.006324365269392729, "timestamp": "2025-09-30 22:15:43.842110", "step": 2002, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:43.877915", "step": 2002, "epoch": 2 }, { "type": "loss", "content": 0.023312492296099663, "timestamp": "2025-09-30 22:15:43.889963", "step": 2003, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:43.930637", "step": 2003, "epoch": 2 }, { "type": "loss", "content": 0.010721305385231972, "timestamp": "2025-09-30 22:15:43.965423", "step": 2004, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:44.000396", "step": 2004, "epoch": 2 }, { "type": "loss", "content": 0.011308002285659313, "timestamp": "2025-09-30 22:15:44.011505", "step": 2005, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:44.055880", "step": 2005, "epoch": 2 }, { "type": "loss", "content": 0.010342972353100777, "timestamp": "2025-09-30 22:15:44.068438", "step": 2006, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:44.111007", "step": 2006, "epoch": 2 }, { "type": "loss", "content": 0.012661411426961422, "timestamp": "2025-09-30 22:15:44.122162", "step": 2007, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:44.158076", "step": 2007, "epoch": 2 }, { "type": "loss", "content": 0.010892446152865887, "timestamp": "2025-09-30 22:15:44.191615", "step": 2008, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:44.237328", "step": 2008, "epoch": 2 }, { "type": "loss", "content": 0.007492191158235073, "timestamp": "2025-09-30 22:15:44.247882", "step": 2009, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:44.284504", "step": 2009, "epoch": 2 }, { "type": "loss", "content": 0.01097020972520113, "timestamp": "2025-09-30 22:15:44.294799", "step": 2010, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:44.334542", "step": 2010, "epoch": 2 }, { "type": "loss", "content": 0.008936642669141293, "timestamp": "2025-09-30 22:15:44.346334", "step": 2011, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:44.384345", "step": 2011, "epoch": 2 }, { "type": "loss", "content": 0.012820220552384853, "timestamp": "2025-09-30 22:15:44.413018", "step": 2012, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:44.447682", "step": 2012, "epoch": 2 }, { "type": "loss", "content": 0.01265409030020237, "timestamp": "2025-09-30 22:15:44.453404", "step": 2013, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:44.500827", "step": 2013, "epoch": 2 }, { "type": "loss", "content": 0.010327492840588093, "timestamp": "2025-09-30 22:15:44.508869", "step": 2014, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:44.554990", "step": 2014, "epoch": 2 }, { "type": "loss", "content": 0.006490873172879219, "timestamp": "2025-09-30 22:15:44.568660", "step": 2015, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:44.606746", "step": 2015, "epoch": 2 }, { "type": "loss", "content": 0.010052801109850407, "timestamp": "2025-09-30 22:15:44.635281", "step": 2016, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:44.674941", "step": 2016, "epoch": 2 }, { "type": "loss", "content": 0.011912785470485687, "timestamp": "2025-09-30 22:15:44.685029", "step": 2017, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:44.719112", "step": 2017, "epoch": 2 }, { "type": "loss", "content": 0.017113978043198586, "timestamp": "2025-09-30 22:15:44.729692", "step": 2018, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:44.779921", "step": 2018, "epoch": 2 }, { "type": "loss", "content": 0.012415301986038685, "timestamp": "2025-09-30 22:15:44.789827", "step": 2019, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:44.830211", "step": 2019, "epoch": 2 }, { "type": "loss", "content": 0.008418025448918343, "timestamp": "2025-09-30 22:15:44.863311", "step": 2020, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:44.910693", "step": 2020, "epoch": 2 }, { "type": "loss", "content": 0.005270734429359436, "timestamp": "2025-09-30 22:15:44.923916", "step": 2021, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:44.957388", "step": 2021, "epoch": 2 }, { "type": "loss", "content": 0.007560055702924728, "timestamp": "2025-09-30 22:15:44.965263", "step": 2022, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:45.007256", "step": 2022, "epoch": 2 }, { "type": "loss", "content": 0.004281700123101473, "timestamp": "2025-09-30 22:15:45.023514", "step": 2023, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:45.065677", "step": 2023, "epoch": 2 }, { "type": "loss", "content": 0.011109118349850178, "timestamp": "2025-09-30 22:15:45.099903", "step": 2024, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:45.131605", "step": 2024, "epoch": 2 }, { "type": "loss", "content": 0.007959590293467045, "timestamp": "2025-09-30 22:15:45.142139", "step": 2025, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:45.183590", "step": 2025, "epoch": 2 }, { "type": "loss", "content": 0.010825558565557003, "timestamp": "2025-09-30 22:15:45.194651", "step": 2026, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:45.233414", "step": 2026, "epoch": 2 }, { "type": "loss", "content": 0.016622615978121758, "timestamp": "2025-09-30 22:15:45.244216", "step": 2027, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:45.280922", "step": 2027, "epoch": 2 }, { "type": "loss", "content": 0.011463593691587448, "timestamp": "2025-09-30 22:15:45.314309", "step": 2028, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:15:45.348038", "step": 2028, "epoch": 2 }, { "type": "loss", "content": 0.01033572107553482, "timestamp": "2025-09-30 22:15:45.351397", "step": 2029, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:45.388485", "step": 2029, "epoch": 2 }, { "type": "loss", "content": 0.005977582652121782, "timestamp": "2025-09-30 22:15:45.398966", "step": 2030, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:45.444266", "step": 2030, "epoch": 2 }, { "type": "loss", "content": 0.014716926030814648, "timestamp": "2025-09-30 22:15:45.457959", "step": 2031, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:45.491603", "step": 2031, "epoch": 2 }, { "type": "loss", "content": 0.018717095255851746, "timestamp": "2025-09-30 22:15:45.520145", "step": 2032, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:45.553014", "step": 2032, "epoch": 2 }, { "type": "loss", "content": 0.007365270983427763, "timestamp": "2025-09-30 22:15:45.565605", "step": 2033, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:45.600960", "step": 2033, "epoch": 2 }, { "type": "loss", "content": 0.007374709937721491, "timestamp": "2025-09-30 22:15:45.613565", "step": 2034, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:45.663856", "step": 2034, "epoch": 2 }, { "type": "loss", "content": 0.004602862522006035, "timestamp": "2025-09-30 22:15:45.679679", "step": 2035, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:45.713682", "step": 2035, "epoch": 2 }, { "type": "loss", "content": 0.004512309562414885, "timestamp": "2025-09-30 22:15:45.747098", "step": 2036, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:45.779757", "step": 2036, "epoch": 2 }, { "type": "loss", "content": 0.011239656247198582, "timestamp": "2025-09-30 22:15:45.784395", "step": 2037, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:45.824005", "step": 2037, "epoch": 2 }, { "type": "loss", "content": 0.009968086145818233, "timestamp": "2025-09-30 22:15:45.834472", "step": 2038, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:45.880507", "step": 2038, "epoch": 2 }, { "type": "loss", "content": 0.009855345822870731, "timestamp": "2025-09-30 22:15:45.896390", "step": 2039, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:45.941039", "step": 2039, "epoch": 2 }, { "type": "loss", "content": 0.009853428229689598, "timestamp": "2025-09-30 22:15:45.975583", "step": 2040, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:46.010782", "step": 2040, "epoch": 2 }, { "type": "loss", "content": 0.008677588775753975, "timestamp": "2025-09-30 22:15:46.015533", "step": 2041, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:46.055165", "step": 2041, "epoch": 2 }, { "type": "loss", "content": 0.014559353701770306, "timestamp": "2025-09-30 22:15:46.065783", "step": 2042, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:46.097782", "step": 2042, "epoch": 2 }, { "type": "loss", "content": 0.016621189191937447, "timestamp": "2025-09-30 22:15:46.108366", "step": 2043, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:46.152368", "step": 2043, "epoch": 2 }, { "type": "loss", "content": 0.0088790999725461, "timestamp": "2025-09-30 22:15:46.183643", "step": 2044, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:46.226022", "step": 2044, "epoch": 2 }, { "type": "loss", "content": 0.014385106973350048, "timestamp": "2025-09-30 22:15:46.231534", "step": 2045, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:46.269038", "step": 2045, "epoch": 2 }, { "type": "loss", "content": 0.006793558597564697, "timestamp": "2025-09-30 22:15:46.282426", "step": 2046, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:46.325522", "step": 2046, "epoch": 2 }, { "type": "loss", "content": 0.0046113766729831696, "timestamp": "2025-09-30 22:15:46.339330", "step": 2047, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:46.384990", "step": 2047, "epoch": 2 }, { "type": "loss", "content": 0.0070945583283901215, "timestamp": "2025-09-30 22:15:46.419659", "step": 2048, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:46.456068", "step": 2048, "epoch": 2 }, { "type": "loss", "content": 0.012187566608190536, "timestamp": "2025-09-30 22:15:46.468668", "step": 2049, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:46.502019", "step": 2049, "epoch": 2 }, { "type": "loss", "content": 0.017908932641148567, "timestamp": "2025-09-30 22:15:46.514403", "step": 2050, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:46.558697", "step": 2050, "epoch": 2 }, { "type": "loss", "content": 0.009582750499248505, "timestamp": "2025-09-30 22:15:46.572094", "step": 2051, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:46.617163", "step": 2051, "epoch": 2 }, { "type": "loss", "content": 0.013554844073951244, "timestamp": "2025-09-30 22:15:46.651797", "step": 2052, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:46.692422", "step": 2052, "epoch": 2 }, { "type": "loss", "content": 0.011859928257763386, "timestamp": "2025-09-30 22:15:46.700211", "step": 2053, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:46.742006", "step": 2053, "epoch": 2 }, { "type": "loss", "content": 0.008377709425985813, "timestamp": "2025-09-30 22:15:46.755680", "step": 2054, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:46.796919", "step": 2054, "epoch": 2 }, { "type": "loss", "content": 0.00923876091837883, "timestamp": "2025-09-30 22:15:46.804938", "step": 2055, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:46.851929", "step": 2055, "epoch": 2 }, { "type": "loss", "content": 0.011093209497630596, "timestamp": "2025-09-30 22:15:46.886169", "step": 2056, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:46.922464", "step": 2056, "epoch": 2 }, { "type": "loss", "content": 0.012157139368355274, "timestamp": "2025-09-30 22:15:46.932261", "step": 2057, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:46.967189", "step": 2057, "epoch": 2 }, { "type": "loss", "content": 0.015122108161449432, "timestamp": "2025-09-30 22:15:46.978250", "step": 2058, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:47.012612", "step": 2058, "epoch": 2 }, { "type": "loss", "content": 0.007611238397657871, "timestamp": "2025-09-30 22:15:47.024822", "step": 2059, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:47.063942", "step": 2059, "epoch": 2 }, { "type": "loss", "content": 0.012658152729272842, "timestamp": "2025-09-30 22:15:47.098526", "step": 2060, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:47.142781", "step": 2060, "epoch": 2 }, { "type": "loss", "content": 0.006565988529473543, "timestamp": "2025-09-30 22:15:47.155470", "step": 2061, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:47.201615", "step": 2061, "epoch": 2 }, { "type": "loss", "content": 0.007400591857731342, "timestamp": "2025-09-30 22:15:47.214154", "step": 2062, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:47.255351", "step": 2062, "epoch": 2 }, { "type": "loss", "content": 0.006891261320561171, "timestamp": "2025-09-30 22:15:47.268723", "step": 2063, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:47.310324", "step": 2063, "epoch": 2 }, { "type": "loss", "content": 0.007434625178575516, "timestamp": "2025-09-30 22:15:47.343403", "step": 2064, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:47.378392", "step": 2064, "epoch": 2 }, { "type": "loss", "content": 0.00676254415884614, "timestamp": "2025-09-30 22:15:47.390909", "step": 2065, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:47.427085", "step": 2065, "epoch": 2 }, { "type": "loss", "content": 0.0081375977024436, "timestamp": "2025-09-30 22:15:47.434375", "step": 2066, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:47.474283", "step": 2066, "epoch": 2 }, { "type": "loss", "content": 0.008514241315424442, "timestamp": "2025-09-30 22:15:47.486782", "step": 2067, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:47.522766", "step": 2067, "epoch": 2 }, { "type": "loss", "content": 0.009845650754868984, "timestamp": "2025-09-30 22:15:47.551517", "step": 2068, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:47.590783", "step": 2068, "epoch": 2 }, { "type": "loss", "content": 0.011821952648460865, "timestamp": "2025-09-30 22:15:47.600939", "step": 2069, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:47.639504", "step": 2069, "epoch": 2 }, { "type": "loss", "content": 0.009292146191000938, "timestamp": "2025-09-30 22:15:47.650381", "step": 2070, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:15:50.188319", "step": 2070, "epoch": 2 }, { "type": "pplx", "content": 5.5781301359781965, "timestamp": "2025-09-30 22:15:50.191016", "step": 2070, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:50.222991", "step": 2070, "epoch": 2 }, { "type": "loss", "content": 0.014613412320613861, "timestamp": "2025-09-30 22:15:50.229575", "step": 2071, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:15:50.281375", "step": 2071, "epoch": 2 }, { "type": "loss", "content": 0.005937773268669844, "timestamp": "2025-09-30 22:15:50.319265", "step": 2072, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:50.365232", "step": 2072, "epoch": 2 }, { "type": "loss", "content": 0.005999124608933926, "timestamp": "2025-09-30 22:15:50.377850", "step": 2073, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:50.411882", "step": 2073, "epoch": 2 }, { "type": "loss", "content": 0.01658693514764309, "timestamp": "2025-09-30 22:15:50.424163", "step": 2074, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:50.456708", "step": 2074, "epoch": 2 }, { "type": "loss", "content": 0.008094480261206627, "timestamp": "2025-09-30 22:15:50.464652", "step": 2075, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:50.501347", "step": 2075, "epoch": 2 }, { "type": "loss", "content": 0.013593686744570732, "timestamp": "2025-09-30 22:15:50.534533", "step": 2076, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:50.569164", "step": 2076, "epoch": 2 }, { "type": "loss", "content": 0.01685952953994274, "timestamp": "2025-09-30 22:15:50.577215", "step": 2077, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:50.616449", "step": 2077, "epoch": 2 }, { "type": "loss", "content": 0.0064816963858902454, "timestamp": "2025-09-30 22:15:50.629028", "step": 2078, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:50.664593", "step": 2078, "epoch": 2 }, { "type": "loss", "content": 0.007426493801176548, "timestamp": "2025-09-30 22:15:50.672763", "step": 2079, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:50.716301", "step": 2079, "epoch": 2 }, { "type": "loss", "content": 0.00575529458001256, "timestamp": "2025-09-30 22:15:50.750505", "step": 2080, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:50.789204", "step": 2080, "epoch": 2 }, { "type": "loss", "content": 0.01651431806385517, "timestamp": "2025-09-30 22:15:50.797865", "step": 2081, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:50.834645", "step": 2081, "epoch": 2 }, { "type": "loss", "content": 0.014004933647811413, "timestamp": "2025-09-30 22:15:50.847041", "step": 2082, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:50.880426", "step": 2082, "epoch": 2 }, { "type": "loss", "content": 0.011181175708770752, "timestamp": "2025-09-30 22:15:50.892776", "step": 2083, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:50.931865", "step": 2083, "epoch": 2 }, { "type": "loss", "content": 0.012345019727945328, "timestamp": "2025-09-30 22:15:50.966103", "step": 2084, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:51.012624", "step": 2084, "epoch": 2 }, { "type": "loss", "content": 0.013612167909741402, "timestamp": "2025-09-30 22:15:51.025648", "step": 2085, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:51.063085", "step": 2085, "epoch": 2 }, { "type": "loss", "content": 0.007797682657837868, "timestamp": "2025-09-30 22:15:51.074215", "step": 2086, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:51.109535", "step": 2086, "epoch": 2 }, { "type": "loss", "content": 0.012879363261163235, "timestamp": "2025-09-30 22:15:51.120552", "step": 2087, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:51.167767", "step": 2087, "epoch": 2 }, { "type": "loss", "content": 0.00713657820597291, "timestamp": "2025-09-30 22:15:51.202388", "step": 2088, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:51.237362", "step": 2088, "epoch": 2 }, { "type": "loss", "content": 0.016538361087441444, "timestamp": "2025-09-30 22:15:51.250340", "step": 2089, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:51.297149", "step": 2089, "epoch": 2 }, { "type": "loss", "content": 0.006878055166453123, "timestamp": "2025-09-30 22:15:51.312795", "step": 2090, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:51.352172", "step": 2090, "epoch": 2 }, { "type": "loss", "content": 0.005907298065721989, "timestamp": "2025-09-30 22:15:51.365580", "step": 2091, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:51.399530", "step": 2091, "epoch": 2 }, { "type": "loss", "content": 0.011420510709285736, "timestamp": "2025-09-30 22:15:51.432932", "step": 2092, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:51.479046", "step": 2092, "epoch": 2 }, { "type": "loss", "content": 0.007098886650055647, "timestamp": "2025-09-30 22:15:51.492351", "step": 2093, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:51.541119", "step": 2093, "epoch": 2 }, { "type": "loss", "content": 0.00935671292245388, "timestamp": "2025-09-30 22:15:51.557034", "step": 2094, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:51.595846", "step": 2094, "epoch": 2 }, { "type": "loss", "content": 0.007575173396617174, "timestamp": "2025-09-30 22:15:51.609663", "step": 2095, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:51.654887", "step": 2095, "epoch": 2 }, { "type": "loss", "content": 0.0037557778414338827, "timestamp": "2025-09-30 22:15:51.691722", "step": 2096, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:51.731402", "step": 2096, "epoch": 2 }, { "type": "loss", "content": 0.008648642338812351, "timestamp": "2025-09-30 22:15:51.741255", "step": 2097, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:51.778787", "step": 2097, "epoch": 2 }, { "type": "loss", "content": 0.012735256925225258, "timestamp": "2025-09-30 22:15:51.792154", "step": 2098, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:51.842223", "step": 2098, "epoch": 2 }, { "type": "loss", "content": 0.011567601934075356, "timestamp": "2025-09-30 22:15:51.856004", "step": 2099, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:51.892525", "step": 2099, "epoch": 2 }, { "type": "loss", "content": 0.013985881581902504, "timestamp": "2025-09-30 22:15:51.924484", "step": 2100, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:51.959500", "step": 2100, "epoch": 2 }, { "type": "loss", "content": 0.0130574656650424, "timestamp": "2025-09-30 22:15:51.972646", "step": 2101, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:52.009601", "step": 2101, "epoch": 2 }, { "type": "loss", "content": 0.010273012332618237, "timestamp": "2025-09-30 22:15:52.021922", "step": 2102, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:52.056863", "step": 2102, "epoch": 2 }, { "type": "loss", "content": 0.014140932820737362, "timestamp": "2025-09-30 22:15:52.070271", "step": 2103, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:52.109205", "step": 2103, "epoch": 2 }, { "type": "loss", "content": 0.009167030453681946, "timestamp": "2025-09-30 22:15:52.143894", "step": 2104, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:15:52.187873", "step": 2104, "epoch": 2 }, { "type": "loss", "content": 0.008260817267000675, "timestamp": "2025-09-30 22:15:52.204884", "step": 2105, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:52.246966", "step": 2105, "epoch": 2 }, { "type": "loss", "content": 0.01203217078000307, "timestamp": "2025-09-30 22:15:52.260429", "step": 2106, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:52.295351", "step": 2106, "epoch": 2 }, { "type": "loss", "content": 0.01023252122104168, "timestamp": "2025-09-30 22:15:52.303218", "step": 2107, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:52.338941", "step": 2107, "epoch": 2 }, { "type": "loss", "content": 0.021626289933919907, "timestamp": "2025-09-30 22:15:52.370083", "step": 2108, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:52.409825", "step": 2108, "epoch": 2 }, { "type": "loss", "content": 0.02148636244237423, "timestamp": "2025-09-30 22:15:52.415548", "step": 2109, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:52.454766", "step": 2109, "epoch": 2 }, { "type": "loss", "content": 0.009455446153879166, "timestamp": "2025-09-30 22:15:52.467361", "step": 2110, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:52.505991", "step": 2110, "epoch": 2 }, { "type": "loss", "content": 0.015936443582177162, "timestamp": "2025-09-30 22:15:52.519372", "step": 2111, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:52.554404", "step": 2111, "epoch": 2 }, { "type": "loss", "content": 0.020320961251854897, "timestamp": "2025-09-30 22:15:52.587749", "step": 2112, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:52.625544", "step": 2112, "epoch": 2 }, { "type": "loss", "content": 0.009221428073942661, "timestamp": "2025-09-30 22:15:52.636157", "step": 2113, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:52.678337", "step": 2113, "epoch": 2 }, { "type": "loss", "content": 0.011892154812812805, "timestamp": "2025-09-30 22:15:52.691651", "step": 2114, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:15:52.736693", "step": 2114, "epoch": 2 }, { "type": "loss", "content": 0.008845707401633263, "timestamp": "2025-09-30 22:15:52.753048", "step": 2115, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:52.795751", "step": 2115, "epoch": 2 }, { "type": "loss", "content": 0.007404664997011423, "timestamp": "2025-09-30 22:15:52.832485", "step": 2116, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:52.872791", "step": 2116, "epoch": 2 }, { "type": "loss", "content": 0.007510712370276451, "timestamp": "2025-09-30 22:15:52.886189", "step": 2117, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:52.923463", "step": 2117, "epoch": 2 }, { "type": "loss", "content": 0.009143121540546417, "timestamp": "2025-09-30 22:15:52.933925", "step": 2118, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:15:52.992396", "step": 2118, "epoch": 2 }, { "type": "loss", "content": 0.00436469865962863, "timestamp": "2025-09-30 22:15:53.008592", "step": 2119, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:53.066340", "step": 2119, "epoch": 2 }, { "type": "loss", "content": 0.009797188453376293, "timestamp": "2025-09-30 22:15:53.100866", "step": 2120, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:53.142376", "step": 2120, "epoch": 2 }, { "type": "loss", "content": 0.010304818861186504, "timestamp": "2025-09-30 22:15:53.152152", "step": 2121, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:53.193096", "step": 2121, "epoch": 2 }, { "type": "loss", "content": 0.013395457528531551, "timestamp": "2025-09-30 22:15:53.200720", "step": 2122, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:53.232561", "step": 2122, "epoch": 2 }, { "type": "loss", "content": 0.01645011082291603, "timestamp": "2025-09-30 22:15:53.242841", "step": 2123, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:53.279323", "step": 2123, "epoch": 2 }, { "type": "loss", "content": 0.004526421893388033, "timestamp": "2025-09-30 22:15:53.310425", "step": 2124, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:53.346184", "step": 2124, "epoch": 2 }, { "type": "loss", "content": 0.028871094807982445, "timestamp": "2025-09-30 22:15:53.359303", "step": 2125, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:53.395171", "step": 2125, "epoch": 2 }, { "type": "loss", "content": 0.0040580276399850845, "timestamp": "2025-09-30 22:15:53.405721", "step": 2126, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:53.449254", "step": 2126, "epoch": 2 }, { "type": "loss", "content": 0.011990678496658802, "timestamp": "2025-09-30 22:15:53.463220", "step": 2127, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:53.508104", "step": 2127, "epoch": 2 }, { "type": "loss", "content": 0.01219364907592535, "timestamp": "2025-09-30 22:15:53.541298", "step": 2128, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:53.580794", "step": 2128, "epoch": 2 }, { "type": "loss", "content": 0.00998268835246563, "timestamp": "2025-09-30 22:15:53.595920", "step": 2129, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:53.637055", "step": 2129, "epoch": 2 }, { "type": "loss", "content": 0.020302310585975647, "timestamp": "2025-09-30 22:15:53.652954", "step": 2130, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:53.689382", "step": 2130, "epoch": 2 }, { "type": "loss", "content": 0.011316817253828049, "timestamp": "2025-09-30 22:15:53.700462", "step": 2131, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:53.742819", "step": 2131, "epoch": 2 }, { "type": "loss", "content": 0.009340646676719189, "timestamp": "2025-09-30 22:15:53.774119", "step": 2132, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:53.812609", "step": 2132, "epoch": 2 }, { "type": "loss", "content": 0.013567190617322922, "timestamp": "2025-09-30 22:15:53.821467", "step": 2133, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:53.861941", "step": 2133, "epoch": 2 }, { "type": "loss", "content": 0.0061887470073997974, "timestamp": "2025-09-30 22:15:53.875327", "step": 2134, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:53.911908", "step": 2134, "epoch": 2 }, { "type": "loss", "content": 0.008160131052136421, "timestamp": "2025-09-30 22:15:53.925301", "step": 2135, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:53.958650", "step": 2135, "epoch": 2 }, { "type": "loss", "content": 0.011914225295186043, "timestamp": "2025-09-30 22:15:53.991886", "step": 2136, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:54.029081", "step": 2136, "epoch": 2 }, { "type": "loss", "content": 0.00802577380090952, "timestamp": "2025-09-30 22:15:54.037851", "step": 2137, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:54.072938", "step": 2137, "epoch": 2 }, { "type": "loss", "content": 0.008770965039730072, "timestamp": "2025-09-30 22:15:54.085279", "step": 2138, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:54.126509", "step": 2138, "epoch": 2 }, { "type": "loss", "content": 0.008672679774463177, "timestamp": "2025-09-30 22:15:54.133766", "step": 2139, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:54.181090", "step": 2139, "epoch": 2 }, { "type": "loss", "content": 0.0069053624756634235, "timestamp": "2025-09-30 22:15:54.217849", "step": 2140, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:54.252278", "step": 2140, "epoch": 2 }, { "type": "loss", "content": 0.009527964517474174, "timestamp": "2025-09-30 22:15:54.257088", "step": 2141, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:54.292415", "step": 2141, "epoch": 2 }, { "type": "loss", "content": 0.024030277505517006, "timestamp": "2025-09-30 22:15:54.299437", "step": 2142, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:54.341480", "step": 2142, "epoch": 2 }, { "type": "loss", "content": 0.005627688951790333, "timestamp": "2025-09-30 22:15:54.352066", "step": 2143, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:54.393200", "step": 2143, "epoch": 2 }, { "type": "loss", "content": 0.011507662013173103, "timestamp": "2025-09-30 22:15:54.425159", "step": 2144, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:54.460471", "step": 2144, "epoch": 2 }, { "type": "loss", "content": 0.010537424124777317, "timestamp": "2025-09-30 22:15:54.473784", "step": 2145, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:54.510723", "step": 2145, "epoch": 2 }, { "type": "loss", "content": 0.01107259001582861, "timestamp": "2025-09-30 22:15:54.518686", "step": 2146, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:54.555489", "step": 2146, "epoch": 2 }, { "type": "loss", "content": 0.028157521039247513, "timestamp": "2025-09-30 22:15:54.562649", "step": 2147, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:54.605357", "step": 2147, "epoch": 2 }, { "type": "loss", "content": 0.00928823184221983, "timestamp": "2025-09-30 22:15:54.637474", "step": 2148, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:54.671978", "step": 2148, "epoch": 2 }, { "type": "loss", "content": 0.008403152227401733, "timestamp": "2025-09-30 22:15:54.684614", "step": 2149, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:54.718865", "step": 2149, "epoch": 2 }, { "type": "loss", "content": 0.012543005868792534, "timestamp": "2025-09-30 22:15:54.726705", "step": 2150, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:54.761107", "step": 2150, "epoch": 2 }, { "type": "loss", "content": 0.009016967378556728, "timestamp": "2025-09-30 22:15:54.769091", "step": 2151, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:54.805847", "step": 2151, "epoch": 2 }, { "type": "loss", "content": 0.010433612391352654, "timestamp": "2025-09-30 22:15:54.840545", "step": 2152, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:15:54.883286", "step": 2152, "epoch": 2 }, { "type": "loss", "content": 0.00729801319539547, "timestamp": "2025-09-30 22:15:54.898631", "step": 2153, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:15:54.944731", "step": 2153, "epoch": 2 }, { "type": "loss", "content": 0.010827134363353252, "timestamp": "2025-09-30 22:15:54.961814", "step": 2154, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:54.999172", "step": 2154, "epoch": 2 }, { "type": "loss", "content": 0.01179803628474474, "timestamp": "2025-09-30 22:15:55.013013", "step": 2155, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:55.052053", "step": 2155, "epoch": 2 }, { "type": "loss", "content": 0.007486463990062475, "timestamp": "2025-09-30 22:15:55.086874", "step": 2156, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:55.124429", "step": 2156, "epoch": 2 }, { "type": "loss", "content": 0.010275008156895638, "timestamp": "2025-09-30 22:15:55.132553", "step": 2157, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:55.169980", "step": 2157, "epoch": 2 }, { "type": "loss", "content": 0.013125157915055752, "timestamp": "2025-09-30 22:15:55.183777", "step": 2158, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:15:55.219724", "step": 2158, "epoch": 2 }, { "type": "loss", "content": 0.009517863392829895, "timestamp": "2025-09-30 22:15:55.232099", "step": 2159, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:55.275332", "step": 2159, "epoch": 2 }, { "type": "loss", "content": 0.004936881363391876, "timestamp": "2025-09-30 22:15:55.309586", "step": 2160, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:55.351880", "step": 2160, "epoch": 2 }, { "type": "loss", "content": 0.005083846859633923, "timestamp": "2025-09-30 22:15:55.365252", "step": 2161, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:55.402697", "step": 2161, "epoch": 2 }, { "type": "loss", "content": 0.006979496218264103, "timestamp": "2025-09-30 22:15:55.416123", "step": 2162, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:55.453992", "step": 2162, "epoch": 2 }, { "type": "loss", "content": 0.009978152811527252, "timestamp": "2025-09-30 22:15:55.466502", "step": 2163, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:55.507099", "step": 2163, "epoch": 2 }, { "type": "loss", "content": 0.006703498773276806, "timestamp": "2025-09-30 22:15:55.541762", "step": 2164, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:55.574313", "step": 2164, "epoch": 2 }, { "type": "loss", "content": 0.013024583458900452, "timestamp": "2025-09-30 22:15:55.582574", "step": 2165, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:55.619787", "step": 2165, "epoch": 2 }, { "type": "loss", "content": 0.008418618701398373, "timestamp": "2025-09-30 22:15:55.632390", "step": 2166, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:55.668165", "step": 2166, "epoch": 2 }, { "type": "loss", "content": 0.012666082940995693, "timestamp": "2025-09-30 22:15:55.675240", "step": 2167, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:55.710537", "step": 2167, "epoch": 2 }, { "type": "loss", "content": 0.0075226472690701485, "timestamp": "2025-09-30 22:15:55.738639", "step": 2168, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:55.780602", "step": 2168, "epoch": 2 }, { "type": "loss", "content": 0.005574285984039307, "timestamp": "2025-09-30 22:15:55.785544", "step": 2169, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:55.818416", "step": 2169, "epoch": 2 }, { "type": "loss", "content": 0.01113008800894022, "timestamp": "2025-09-30 22:15:55.825793", "step": 2170, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:15:55.865733", "step": 2170, "epoch": 2 }, { "type": "loss", "content": 0.011179173365235329, "timestamp": "2025-09-30 22:15:55.878298", "step": 2171, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:15:55.914286", "step": 2171, "epoch": 2 }, { "type": "loss", "content": 0.010755863040685654, "timestamp": "2025-09-30 22:15:55.946096", "step": 2172, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:55.984802", "step": 2172, "epoch": 2 }, { "type": "loss", "content": 0.004485005047172308, "timestamp": "2025-09-30 22:15:55.997859", "step": 2173, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:56.031636", "step": 2173, "epoch": 2 }, { "type": "loss", "content": 0.008229841478168964, "timestamp": "2025-09-30 22:15:56.042094", "step": 2174, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:56.078451", "step": 2174, "epoch": 2 }, { "type": "loss", "content": 0.007020510733127594, "timestamp": "2025-09-30 22:15:56.092315", "step": 2175, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:15:56.138923", "step": 2175, "epoch": 2 }, { "type": "loss", "content": 0.025375016033649445, "timestamp": "2025-09-30 22:15:56.175499", "step": 2176, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:56.223252", "step": 2176, "epoch": 2 }, { "type": "loss", "content": 0.011891238391399384, "timestamp": "2025-09-30 22:15:56.237795", "step": 2177, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:56.293871", "step": 2177, "epoch": 2 }, { "type": "loss", "content": 0.011667244136333466, "timestamp": "2025-09-30 22:15:56.308653", "step": 2178, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:56.359219", "step": 2178, "epoch": 2 }, { "type": "loss", "content": 0.007750329095870256, "timestamp": "2025-09-30 22:15:56.373232", "step": 2179, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:56.427206", "step": 2179, "epoch": 2 }, { "type": "loss", "content": 0.011612946167588234, "timestamp": "2025-09-30 22:15:56.461448", "step": 2180, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:56.496330", "step": 2180, "epoch": 2 }, { "type": "loss", "content": 0.014608295634388924, "timestamp": "2025-09-30 22:15:56.505829", "step": 2181, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:56.541648", "step": 2181, "epoch": 2 }, { "type": "loss", "content": 0.007489521987736225, "timestamp": "2025-09-30 22:15:56.556018", "step": 2182, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:56.606206", "step": 2182, "epoch": 2 }, { "type": "loss", "content": 0.006555826403200626, "timestamp": "2025-09-30 22:15:56.613440", "step": 2183, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:56.659068", "step": 2183, "epoch": 2 }, { "type": "loss", "content": 0.010146516375243664, "timestamp": "2025-09-30 22:15:56.687947", "step": 2184, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:15:56.735455", "step": 2184, "epoch": 2 }, { "type": "loss", "content": 0.028159474954009056, "timestamp": "2025-09-30 22:15:56.750854", "step": 2185, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:15:59.216342", "step": 2185, "epoch": 2 }, { "type": "pplx", "content": 5.614024903667018, "timestamp": "2025-09-30 22:15:59.218947", "step": 2185, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:59.249474", "step": 2185, "epoch": 2 }, { "type": "loss", "content": 0.007176598068326712, "timestamp": "2025-09-30 22:15:59.260149", "step": 2186, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:15:59.299798", "step": 2186, "epoch": 2 }, { "type": "loss", "content": 0.010663502849638462, "timestamp": "2025-09-30 22:15:59.306654", "step": 2187, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:15:59.342291", "step": 2187, "epoch": 2 }, { "type": "loss", "content": 0.014264430850744247, "timestamp": "2025-09-30 22:15:59.373436", "step": 2188, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:59.407340", "step": 2188, "epoch": 2 }, { "type": "loss", "content": 0.013394519686698914, "timestamp": "2025-09-30 22:15:59.412681", "step": 2189, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:15:59.448096", "step": 2189, "epoch": 2 }, { "type": "loss", "content": 0.01116852555423975, "timestamp": "2025-09-30 22:15:59.455873", "step": 2190, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:15:59.489528", "step": 2190, "epoch": 2 }, { "type": "loss", "content": 0.012576432898640633, "timestamp": "2025-09-30 22:15:59.497397", "step": 2191, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:59.531745", "step": 2191, "epoch": 2 }, { "type": "loss", "content": 0.009694776497781277, "timestamp": "2025-09-30 22:15:59.563663", "step": 2192, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:15:59.598912", "step": 2192, "epoch": 2 }, { "type": "loss", "content": 0.009965450502932072, "timestamp": "2025-09-30 22:15:59.612058", "step": 2193, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:59.646110", "step": 2193, "epoch": 2 }, { "type": "loss", "content": 0.010894293896853924, "timestamp": "2025-09-30 22:15:59.657536", "step": 2194, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:15:59.697841", "step": 2194, "epoch": 2 }, { "type": "loss", "content": 0.00821804627776146, "timestamp": "2025-09-30 22:15:59.709023", "step": 2195, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:59.743982", "step": 2195, "epoch": 2 }, { "type": "loss", "content": 0.01242805365473032, "timestamp": "2025-09-30 22:15:59.778206", "step": 2196, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:59.814437", "step": 2196, "epoch": 2 }, { "type": "loss", "content": 0.012450500391423702, "timestamp": "2025-09-30 22:15:59.827121", "step": 2197, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:15:59.882887", "step": 2197, "epoch": 2 }, { "type": "loss", "content": 0.007693701423704624, "timestamp": "2025-09-30 22:15:59.896598", "step": 2198, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:15:59.936345", "step": 2198, "epoch": 2 }, { "type": "loss", "content": 0.014463446103036404, "timestamp": "2025-09-30 22:15:59.950386", "step": 2199, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:15:59.991831", "step": 2199, "epoch": 2 }, { "type": "loss", "content": 0.01318387035280466, "timestamp": "2025-09-30 22:16:00.026108", "step": 2200, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:00.063593", "step": 2200, "epoch": 2 }, { "type": "loss", "content": 0.004912909120321274, "timestamp": "2025-09-30 22:16:00.073659", "step": 2201, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:00.109365", "step": 2201, "epoch": 2 }, { "type": "loss", "content": 0.011479430831968784, "timestamp": "2025-09-30 22:16:00.121792", "step": 2202, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:00.164778", "step": 2202, "epoch": 2 }, { "type": "loss", "content": 0.011321412399411201, "timestamp": "2025-09-30 22:16:00.172152", "step": 2203, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:00.215177", "step": 2203, "epoch": 2 }, { "type": "loss", "content": 0.009902720339596272, "timestamp": "2025-09-30 22:16:00.251864", "step": 2204, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:00.292852", "step": 2204, "epoch": 2 }, { "type": "loss", "content": 0.00927629042416811, "timestamp": "2025-09-30 22:16:00.305966", "step": 2205, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:00.345430", "step": 2205, "epoch": 2 }, { "type": "loss", "content": 0.010016247630119324, "timestamp": "2025-09-30 22:16:00.353378", "step": 2206, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:00.405618", "step": 2206, "epoch": 2 }, { "type": "loss", "content": 0.01247417088598013, "timestamp": "2025-09-30 22:16:00.421534", "step": 2207, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:16:00.475589", "step": 2207, "epoch": 2 }, { "type": "loss", "content": 0.007204344030469656, "timestamp": "2025-09-30 22:16:00.513797", "step": 2208, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:00.548636", "step": 2208, "epoch": 2 }, { "type": "loss", "content": 0.017199793830513954, "timestamp": "2025-09-30 22:16:00.558805", "step": 2209, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:00.601264", "step": 2209, "epoch": 2 }, { "type": "loss", "content": 0.012835008092224598, "timestamp": "2025-09-30 22:16:00.613794", "step": 2210, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:00.646429", "step": 2210, "epoch": 2 }, { "type": "loss", "content": 0.009112820960581303, "timestamp": "2025-09-30 22:16:00.654077", "step": 2211, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:00.689320", "step": 2211, "epoch": 2 }, { "type": "loss", "content": 0.012906182557344437, "timestamp": "2025-09-30 22:16:00.722455", "step": 2212, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:00.755148", "step": 2212, "epoch": 2 }, { "type": "loss", "content": 0.005865730345249176, "timestamp": "2025-09-30 22:16:00.760934", "step": 2213, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:00.804911", "step": 2213, "epoch": 2 }, { "type": "loss", "content": 0.01065493281930685, "timestamp": "2025-09-30 22:16:00.812676", "step": 2214, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:00.852881", "step": 2214, "epoch": 2 }, { "type": "loss", "content": 0.009175293147563934, "timestamp": "2025-09-30 22:16:00.860727", "step": 2215, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:00.893745", "step": 2215, "epoch": 2 }, { "type": "loss", "content": 0.009614244103431702, "timestamp": "2025-09-30 22:16:00.925066", "step": 2216, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:00.964192", "step": 2216, "epoch": 2 }, { "type": "loss", "content": 0.011298530735075474, "timestamp": "2025-09-30 22:16:00.974105", "step": 2217, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:01.025615", "step": 2217, "epoch": 2 }, { "type": "loss", "content": 0.009481500834226608, "timestamp": "2025-09-30 22:16:01.038209", "step": 2218, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:01.080015", "step": 2218, "epoch": 2 }, { "type": "loss", "content": 0.008077259175479412, "timestamp": "2025-09-30 22:16:01.093869", "step": 2219, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:01.142383", "step": 2219, "epoch": 2 }, { "type": "loss", "content": 0.006438512355089188, "timestamp": "2025-09-30 22:16:01.171207", "step": 2220, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:01.206086", "step": 2220, "epoch": 2 }, { "type": "loss", "content": 0.010353588499128819, "timestamp": "2025-09-30 22:16:01.211726", "step": 2221, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:01.245007", "step": 2221, "epoch": 2 }, { "type": "loss", "content": 0.006344547960907221, "timestamp": "2025-09-30 22:16:01.257593", "step": 2222, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:01.293379", "step": 2222, "epoch": 2 }, { "type": "loss", "content": 0.010450613684952259, "timestamp": "2025-09-30 22:16:01.301101", "step": 2223, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:01.333219", "step": 2223, "epoch": 2 }, { "type": "loss", "content": 0.00932193361222744, "timestamp": "2025-09-30 22:16:01.366335", "step": 2224, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:01.399437", "step": 2224, "epoch": 2 }, { "type": "loss", "content": 0.005790230818092823, "timestamp": "2025-09-30 22:16:01.404384", "step": 2225, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:01.447074", "step": 2225, "epoch": 2 }, { "type": "loss", "content": 0.012799671851098537, "timestamp": "2025-09-30 22:16:01.459410", "step": 2226, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:01.493176", "step": 2226, "epoch": 2 }, { "type": "loss", "content": 0.004914826713502407, "timestamp": "2025-09-30 22:16:01.505746", "step": 2227, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:01.544020", "step": 2227, "epoch": 2 }, { "type": "loss", "content": 0.006473730318248272, "timestamp": "2025-09-30 22:16:01.578275", "step": 2228, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:01.610405", "step": 2228, "epoch": 2 }, { "type": "loss", "content": 0.0077035753056406975, "timestamp": "2025-09-30 22:16:01.619345", "step": 2229, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:01.652221", "step": 2229, "epoch": 2 }, { "type": "loss", "content": 0.009425118565559387, "timestamp": "2025-09-30 22:16:01.659881", "step": 2230, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:01.695532", "step": 2230, "epoch": 2 }, { "type": "loss", "content": 0.011323267593979836, "timestamp": "2025-09-30 22:16:01.709230", "step": 2231, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:01.745294", "step": 2231, "epoch": 2 }, { "type": "loss", "content": 0.005302715580910444, "timestamp": "2025-09-30 22:16:01.773301", "step": 2232, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:01.810200", "step": 2232, "epoch": 2 }, { "type": "loss", "content": 0.010681116953492165, "timestamp": "2025-09-30 22:16:01.815859", "step": 2233, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:01.859037", "step": 2233, "epoch": 2 }, { "type": "loss", "content": 0.005477715749293566, "timestamp": "2025-09-30 22:16:01.871327", "step": 2234, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:01.904481", "step": 2234, "epoch": 2 }, { "type": "loss", "content": 0.00989870447665453, "timestamp": "2025-09-30 22:16:01.914974", "step": 2235, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:01.955710", "step": 2235, "epoch": 2 }, { "type": "loss", "content": 0.004034838639199734, "timestamp": "2025-09-30 22:16:01.989103", "step": 2236, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:02.039332", "step": 2236, "epoch": 2 }, { "type": "loss", "content": 0.0105787618085742, "timestamp": "2025-09-30 22:16:02.051951", "step": 2237, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:02.092289", "step": 2237, "epoch": 2 }, { "type": "loss", "content": 0.00780834536999464, "timestamp": "2025-09-30 22:16:02.104784", "step": 2238, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:02.137504", "step": 2238, "epoch": 2 }, { "type": "loss", "content": 0.012646148912608624, "timestamp": "2025-09-30 22:16:02.145332", "step": 2239, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:02.183046", "step": 2239, "epoch": 2 }, { "type": "loss", "content": 0.01194890309125185, "timestamp": "2025-09-30 22:16:02.217713", "step": 2240, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:02.262607", "step": 2240, "epoch": 2 }, { "type": "loss", "content": 0.007028935011476278, "timestamp": "2025-09-30 22:16:02.272466", "step": 2241, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:02.313166", "step": 2241, "epoch": 2 }, { "type": "loss", "content": 0.00852537713944912, "timestamp": "2025-09-30 22:16:02.326563", "step": 2242, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:02.364833", "step": 2242, "epoch": 2 }, { "type": "loss", "content": 0.007157263811677694, "timestamp": "2025-09-30 22:16:02.378859", "step": 2243, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:02.413584", "step": 2243, "epoch": 2 }, { "type": "loss", "content": 0.008009511046111584, "timestamp": "2025-09-30 22:16:02.446974", "step": 2244, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:02.481843", "step": 2244, "epoch": 2 }, { "type": "loss", "content": 0.00914499070495367, "timestamp": "2025-09-30 22:16:02.494465", "step": 2245, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:02.528219", "step": 2245, "epoch": 2 }, { "type": "loss", "content": 0.010988660156726837, "timestamp": "2025-09-30 22:16:02.538717", "step": 2246, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:02.573023", "step": 2246, "epoch": 2 }, { "type": "loss", "content": 0.01401505433022976, "timestamp": "2025-09-30 22:16:02.580168", "step": 2247, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:02.615683", "step": 2247, "epoch": 2 }, { "type": "loss", "content": 0.011652003973722458, "timestamp": "2025-09-30 22:16:02.643782", "step": 2248, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:02.688947", "step": 2248, "epoch": 2 }, { "type": "loss", "content": 0.012605683878064156, "timestamp": "2025-09-30 22:16:02.694489", "step": 2249, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:02.731806", "step": 2249, "epoch": 2 }, { "type": "loss", "content": 0.00848052091896534, "timestamp": "2025-09-30 22:16:02.739506", "step": 2250, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:16:02.775989", "step": 2250, "epoch": 2 }, { "type": "loss", "content": 0.021888215094804764, "timestamp": "2025-09-30 22:16:02.779975", "step": 2251, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:02.812840", "step": 2251, "epoch": 2 }, { "type": "loss", "content": 0.01496994961053133, "timestamp": "2025-09-30 22:16:02.842758", "step": 2252, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:16:02.886105", "step": 2252, "epoch": 2 }, { "type": "loss", "content": 0.0088545773178339, "timestamp": "2025-09-30 22:16:02.903469", "step": 2253, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:02.940176", "step": 2253, "epoch": 2 }, { "type": "loss", "content": 0.007000849116593599, "timestamp": "2025-09-30 22:16:02.953897", "step": 2254, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:02.986709", "step": 2254, "epoch": 2 }, { "type": "loss", "content": 0.012423422187566757, "timestamp": "2025-09-30 22:16:02.991155", "step": 2255, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:03.024781", "step": 2255, "epoch": 2 }, { "type": "loss", "content": 0.013098802417516708, "timestamp": "2025-09-30 22:16:03.056567", "step": 2256, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:03.092064", "step": 2256, "epoch": 2 }, { "type": "loss", "content": 0.010979630053043365, "timestamp": "2025-09-30 22:16:03.097146", "step": 2257, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:03.133860", "step": 2257, "epoch": 2 }, { "type": "loss", "content": 0.016253529116511345, "timestamp": "2025-09-30 22:16:03.141728", "step": 2258, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:16:03.186756", "step": 2258, "epoch": 2 }, { "type": "loss", "content": 0.007928948849439621, "timestamp": "2025-09-30 22:16:03.204484", "step": 2259, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:03.243592", "step": 2259, "epoch": 2 }, { "type": "loss", "content": 0.009120190516114235, "timestamp": "2025-09-30 22:16:03.272204", "step": 2260, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:03.305722", "step": 2260, "epoch": 2 }, { "type": "loss", "content": 0.010486864484846592, "timestamp": "2025-09-30 22:16:03.311318", "step": 2261, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:03.351028", "step": 2261, "epoch": 2 }, { "type": "loss", "content": 0.01721068099141121, "timestamp": "2025-09-30 22:16:03.358965", "step": 2262, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:03.392734", "step": 2262, "epoch": 2 }, { "type": "loss", "content": 0.00959155336022377, "timestamp": "2025-09-30 22:16:03.400660", "step": 2263, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:03.440369", "step": 2263, "epoch": 2 }, { "type": "loss", "content": 0.011247663758695126, "timestamp": "2025-09-30 22:16:03.474966", "step": 2264, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:03.512298", "step": 2264, "epoch": 2 }, { "type": "loss", "content": 0.012087054550647736, "timestamp": "2025-09-30 22:16:03.517598", "step": 2265, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:03.558753", "step": 2265, "epoch": 2 }, { "type": "loss", "content": 0.005484334193170071, "timestamp": "2025-09-30 22:16:03.572482", "step": 2266, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:03.613016", "step": 2266, "epoch": 2 }, { "type": "loss", "content": 0.007422391790896654, "timestamp": "2025-09-30 22:16:03.626383", "step": 2267, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:03.663095", "step": 2267, "epoch": 2 }, { "type": "loss", "content": 0.006608553696423769, "timestamp": "2025-09-30 22:16:03.697659", "step": 2268, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:16:03.739280", "step": 2268, "epoch": 2 }, { "type": "loss", "content": 0.00455620139837265, "timestamp": "2025-09-30 22:16:03.755973", "step": 2269, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:03.789195", "step": 2269, "epoch": 2 }, { "type": "loss", "content": 0.009005401283502579, "timestamp": "2025-09-30 22:16:03.799620", "step": 2270, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:03.840153", "step": 2270, "epoch": 2 }, { "type": "loss", "content": 0.03305266425013542, "timestamp": "2025-09-30 22:16:03.848093", "step": 2271, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:03.884066", "step": 2271, "epoch": 2 }, { "type": "loss", "content": 0.011565910652279854, "timestamp": "2025-09-30 22:16:03.915975", "step": 2272, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:03.949919", "step": 2272, "epoch": 2 }, { "type": "loss", "content": 0.008708333596587181, "timestamp": "2025-09-30 22:16:03.955269", "step": 2273, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:03.997173", "step": 2273, "epoch": 2 }, { "type": "loss", "content": 0.00512832123786211, "timestamp": "2025-09-30 22:16:04.013036", "step": 2274, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:04.059062", "step": 2274, "epoch": 2 }, { "type": "loss", "content": 0.010472118854522705, "timestamp": "2025-09-30 22:16:04.073066", "step": 2275, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:04.128173", "step": 2275, "epoch": 2 }, { "type": "loss", "content": 0.009419584646821022, "timestamp": "2025-09-30 22:16:04.162719", "step": 2276, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:04.196702", "step": 2276, "epoch": 2 }, { "type": "loss", "content": 0.005723748356103897, "timestamp": "2025-09-30 22:16:04.205403", "step": 2277, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:04.237453", "step": 2277, "epoch": 2 }, { "type": "loss", "content": 0.008166803047060966, "timestamp": "2025-09-30 22:16:04.242077", "step": 2278, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:04.281982", "step": 2278, "epoch": 2 }, { "type": "loss", "content": 0.01263397466391325, "timestamp": "2025-09-30 22:16:04.294286", "step": 2279, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:04.328337", "step": 2279, "epoch": 2 }, { "type": "loss", "content": 0.010637044906616211, "timestamp": "2025-09-30 22:16:04.360112", "step": 2280, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:04.400038", "step": 2280, "epoch": 2 }, { "type": "loss", "content": 0.013092324137687683, "timestamp": "2025-09-30 22:16:04.412677", "step": 2281, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:04.455526", "step": 2281, "epoch": 2 }, { "type": "loss", "content": 0.009603707119822502, "timestamp": "2025-09-30 22:16:04.463172", "step": 2282, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:04.499443", "step": 2282, "epoch": 2 }, { "type": "loss", "content": 0.013961341232061386, "timestamp": "2025-09-30 22:16:04.510495", "step": 2283, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:04.546979", "step": 2283, "epoch": 2 }, { "type": "loss", "content": 0.00484241358935833, "timestamp": "2025-09-30 22:16:04.578965", "step": 2284, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:04.617418", "step": 2284, "epoch": 2 }, { "type": "loss", "content": 0.014262514188885689, "timestamp": "2025-09-30 22:16:04.627949", "step": 2285, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:04.665288", "step": 2285, "epoch": 2 }, { "type": "loss", "content": 0.022015521302819252, "timestamp": "2025-09-30 22:16:04.673285", "step": 2286, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:04.710185", "step": 2286, "epoch": 2 }, { "type": "loss", "content": 0.011776491068303585, "timestamp": "2025-09-30 22:16:04.722682", "step": 2287, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:04.762171", "step": 2287, "epoch": 2 }, { "type": "loss", "content": 0.010544958524405956, "timestamp": "2025-09-30 22:16:04.796898", "step": 2288, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:04.841237", "step": 2288, "epoch": 2 }, { "type": "loss", "content": 0.003932147286832333, "timestamp": "2025-09-30 22:16:04.854444", "step": 2289, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:04.892166", "step": 2289, "epoch": 2 }, { "type": "loss", "content": 0.011136190965771675, "timestamp": "2025-09-30 22:16:04.906039", "step": 2290, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:16:04.949567", "step": 2290, "epoch": 2 }, { "type": "loss", "content": 0.015524694696068764, "timestamp": "2025-09-30 22:16:04.965867", "step": 2291, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:05.006660", "step": 2291, "epoch": 2 }, { "type": "loss", "content": 0.0072592394426465034, "timestamp": "2025-09-30 22:16:05.040033", "step": 2292, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:05.085470", "step": 2292, "epoch": 2 }, { "type": "loss", "content": 0.009627988561987877, "timestamp": "2025-09-30 22:16:05.098109", "step": 2293, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:05.140393", "step": 2293, "epoch": 2 }, { "type": "loss", "content": 0.007930876687169075, "timestamp": "2025-09-30 22:16:05.156283", "step": 2294, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:05.192704", "step": 2294, "epoch": 2 }, { "type": "loss", "content": 0.016360390931367874, "timestamp": "2025-09-30 22:16:05.203121", "step": 2295, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:05.241402", "step": 2295, "epoch": 2 }, { "type": "loss", "content": 0.005407324526458979, "timestamp": "2025-09-30 22:16:05.273448", "step": 2296, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:05.305388", "step": 2296, "epoch": 2 }, { "type": "loss", "content": 0.01181397121399641, "timestamp": "2025-09-30 22:16:05.313278", "step": 2297, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:05.349354", "step": 2297, "epoch": 2 }, { "type": "loss", "content": 0.005481473170220852, "timestamp": "2025-09-30 22:16:05.362681", "step": 2298, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:05.401286", "step": 2298, "epoch": 2 }, { "type": "loss", "content": 0.006777736358344555, "timestamp": "2025-09-30 22:16:05.415292", "step": 2299, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:05.455667", "step": 2299, "epoch": 2 }, { "type": "loss", "content": 0.010990391485393047, "timestamp": "2025-09-30 22:16:05.489098", "step": 2300, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:16:07.986643", "step": 2300, "epoch": 2 }, { "type": "pplx", "content": 5.654332293541132, "timestamp": "2025-09-30 22:16:07.993683", "step": 2300, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:08.027455", "step": 2300, "epoch": 2 }, { "type": "loss", "content": 0.01496883388608694, "timestamp": "2025-09-30 22:16:08.037225", "step": 2301, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:08.084041", "step": 2301, "epoch": 2 }, { "type": "loss", "content": 0.011960986070334911, "timestamp": "2025-09-30 22:16:08.096687", "step": 2302, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:08.137462", "step": 2302, "epoch": 2 }, { "type": "loss", "content": 0.00994796585291624, "timestamp": "2025-09-30 22:16:08.145376", "step": 2303, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:08.186949", "step": 2303, "epoch": 2 }, { "type": "loss", "content": 0.010986410081386566, "timestamp": "2025-09-30 22:16:08.223487", "step": 2304, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:08.263976", "step": 2304, "epoch": 2 }, { "type": "loss", "content": 0.00654952647164464, "timestamp": "2025-09-30 22:16:08.279375", "step": 2305, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:08.329375", "step": 2305, "epoch": 2 }, { "type": "loss", "content": 0.009799250401556492, "timestamp": "2025-09-30 22:16:08.343208", "step": 2306, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:08.379722", "step": 2306, "epoch": 2 }, { "type": "loss", "content": 0.014504171907901764, "timestamp": "2025-09-30 22:16:08.392324", "step": 2307, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:08.431664", "step": 2307, "epoch": 2 }, { "type": "loss", "content": 0.007755675353109837, "timestamp": "2025-09-30 22:16:08.466561", "step": 2308, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:08.502141", "step": 2308, "epoch": 2 }, { "type": "loss", "content": 0.00974066648632288, "timestamp": "2025-09-30 22:16:08.507056", "step": 2309, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:08.552495", "step": 2309, "epoch": 2 }, { "type": "loss", "content": 0.00655874889343977, "timestamp": "2025-09-30 22:16:08.566227", "step": 2310, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:08.609713", "step": 2310, "epoch": 2 }, { "type": "loss", "content": 0.01651756651699543, "timestamp": "2025-09-30 22:16:08.617716", "step": 2311, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:08.653081", "step": 2311, "epoch": 2 }, { "type": "loss", "content": 0.011428349651396275, "timestamp": "2025-09-30 22:16:08.681551", "step": 2312, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:16:08.731610", "step": 2312, "epoch": 2 }, { "type": "loss", "content": 0.004849064163863659, "timestamp": "2025-09-30 22:16:08.748632", "step": 2313, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:08.786046", "step": 2313, "epoch": 2 }, { "type": "loss", "content": 0.026103388518095016, "timestamp": "2025-09-30 22:16:08.794220", "step": 2314, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:08.838816", "step": 2314, "epoch": 2 }, { "type": "loss", "content": 0.011875770054757595, "timestamp": "2025-09-30 22:16:08.846363", "step": 2315, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:08.883435", "step": 2315, "epoch": 2 }, { "type": "loss", "content": 0.009886275045573711, "timestamp": "2025-09-30 22:16:08.914639", "step": 2316, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:08.949276", "step": 2316, "epoch": 2 }, { "type": "loss", "content": 0.009622431360185146, "timestamp": "2025-09-30 22:16:08.959874", "step": 2317, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:16:09.015050", "step": 2317, "epoch": 2 }, { "type": "loss", "content": 0.007912329398095608, "timestamp": "2025-09-30 22:16:09.031208", "step": 2318, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:09.082762", "step": 2318, "epoch": 2 }, { "type": "loss", "content": 0.012053360231220722, "timestamp": "2025-09-30 22:16:09.096340", "step": 2319, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:09.135134", "step": 2319, "epoch": 2 }, { "type": "loss", "content": 0.011790354736149311, "timestamp": "2025-09-30 22:16:09.168329", "step": 2320, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:09.210180", "step": 2320, "epoch": 2 }, { "type": "loss", "content": 0.013455498032271862, "timestamp": "2025-09-30 22:16:09.218294", "step": 2321, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:09.254726", "step": 2321, "epoch": 2 }, { "type": "loss", "content": 0.007671833038330078, "timestamp": "2025-09-30 22:16:09.266004", "step": 2322, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:09.299708", "step": 2322, "epoch": 2 }, { "type": "loss", "content": 0.008541908115148544, "timestamp": "2025-09-30 22:16:09.307351", "step": 2323, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:09.343828", "step": 2323, "epoch": 2 }, { "type": "loss", "content": 0.010388360358774662, "timestamp": "2025-09-30 22:16:09.377070", "step": 2324, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:09.419433", "step": 2324, "epoch": 2 }, { "type": "loss", "content": 0.004493786953389645, "timestamp": "2025-09-30 22:16:09.432079", "step": 2325, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:09.488396", "step": 2325, "epoch": 2 }, { "type": "loss", "content": 0.01047692820429802, "timestamp": "2025-09-30 22:16:09.500894", "step": 2326, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:09.538722", "step": 2326, "epoch": 2 }, { "type": "loss", "content": 0.008095546625554562, "timestamp": "2025-09-30 22:16:09.552465", "step": 2327, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:09.591892", "step": 2327, "epoch": 2 }, { "type": "loss", "content": 0.006904910784214735, "timestamp": "2025-09-30 22:16:09.623791", "step": 2328, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:09.658613", "step": 2328, "epoch": 2 }, { "type": "loss", "content": 0.0041915723122656345, "timestamp": "2025-09-30 22:16:09.667322", "step": 2329, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:09.705640", "step": 2329, "epoch": 2 }, { "type": "loss", "content": 0.013046212494373322, "timestamp": "2025-09-30 22:16:09.716740", "step": 2330, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:09.750844", "step": 2330, "epoch": 2 }, { "type": "loss", "content": 0.009823589585721493, "timestamp": "2025-09-30 22:16:09.761201", "step": 2331, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:09.806873", "step": 2331, "epoch": 2 }, { "type": "loss", "content": 0.00981562864035368, "timestamp": "2025-09-30 22:16:09.843358", "step": 2332, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:09.885675", "step": 2332, "epoch": 2 }, { "type": "loss", "content": 0.012619983404874802, "timestamp": "2025-09-30 22:16:09.898279", "step": 2333, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:09.945767", "step": 2333, "epoch": 2 }, { "type": "loss", "content": 0.007371451240032911, "timestamp": "2025-09-30 22:16:09.959477", "step": 2334, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:09.994393", "step": 2334, "epoch": 2 }, { "type": "loss", "content": 0.012231948785483837, "timestamp": "2025-09-30 22:16:10.002444", "step": 2335, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:10.035472", "step": 2335, "epoch": 2 }, { "type": "loss", "content": 0.007391262799501419, "timestamp": "2025-09-30 22:16:10.066705", "step": 2336, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:10.100360", "step": 2336, "epoch": 2 }, { "type": "loss", "content": 0.007522133179008961, "timestamp": "2025-09-30 22:16:10.111067", "step": 2337, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:10.146506", "step": 2337, "epoch": 2 }, { "type": "loss", "content": 0.0057350401766598225, "timestamp": "2025-09-30 22:16:10.160252", "step": 2338, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:10.205516", "step": 2338, "epoch": 2 }, { "type": "loss", "content": 0.005650083534419537, "timestamp": "2025-09-30 22:16:10.219254", "step": 2339, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:10.257407", "step": 2339, "epoch": 2 }, { "type": "loss", "content": 0.003318364266306162, "timestamp": "2025-09-30 22:16:10.291869", "step": 2340, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:10.328847", "step": 2340, "epoch": 2 }, { "type": "loss", "content": 0.009202542714774609, "timestamp": "2025-09-30 22:16:10.342033", "step": 2341, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:10.378956", "step": 2341, "epoch": 2 }, { "type": "loss", "content": 0.009690223261713982, "timestamp": "2025-09-30 22:16:10.391289", "step": 2342, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:10.425727", "step": 2342, "epoch": 2 }, { "type": "loss", "content": 0.009548195637762547, "timestamp": "2025-09-30 22:16:10.433693", "step": 2343, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:10.476039", "step": 2343, "epoch": 2 }, { "type": "loss", "content": 0.012893835082650185, "timestamp": "2025-09-30 22:16:10.509297", "step": 2344, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:10.545556", "step": 2344, "epoch": 2 }, { "type": "loss", "content": 0.008734777569770813, "timestamp": "2025-09-30 22:16:10.554333", "step": 2345, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:10.588437", "step": 2345, "epoch": 2 }, { "type": "loss", "content": 0.011990435421466827, "timestamp": "2025-09-30 22:16:10.596350", "step": 2346, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:10.641052", "step": 2346, "epoch": 2 }, { "type": "loss", "content": 0.008576703257858753, "timestamp": "2025-09-30 22:16:10.656700", "step": 2347, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:10.694763", "step": 2347, "epoch": 2 }, { "type": "loss", "content": 0.008816730231046677, "timestamp": "2025-09-30 22:16:10.723549", "step": 2348, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:10.761325", "step": 2348, "epoch": 2 }, { "type": "loss", "content": 0.008760577067732811, "timestamp": "2025-09-30 22:16:10.774006", "step": 2349, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:10.812614", "step": 2349, "epoch": 2 }, { "type": "loss", "content": 0.008204920217394829, "timestamp": "2025-09-30 22:16:10.823675", "step": 2350, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:10.868150", "step": 2350, "epoch": 2 }, { "type": "loss", "content": 0.004204670432955027, "timestamp": "2025-09-30 22:16:10.881888", "step": 2351, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:10.920760", "step": 2351, "epoch": 2 }, { "type": "loss", "content": 0.0049379183910787106, "timestamp": "2025-09-30 22:16:10.954940", "step": 2352, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:10.994008", "step": 2352, "epoch": 2 }, { "type": "loss", "content": 0.00872363243252039, "timestamp": "2025-09-30 22:16:11.004536", "step": 2353, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:11.036420", "step": 2353, "epoch": 2 }, { "type": "loss", "content": 0.00507052568718791, "timestamp": "2025-09-30 22:16:11.048576", "step": 2354, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:11.092561", "step": 2354, "epoch": 2 }, { "type": "loss", "content": 0.0075722322799265385, "timestamp": "2025-09-30 22:16:11.108474", "step": 2355, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:11.150002", "step": 2355, "epoch": 2 }, { "type": "loss", "content": 0.006875215098261833, "timestamp": "2025-09-30 22:16:11.184801", "step": 2356, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:11.222281", "step": 2356, "epoch": 2 }, { "type": "loss", "content": 0.005606391932815313, "timestamp": "2025-09-30 22:16:11.230096", "step": 2357, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:11.272905", "step": 2357, "epoch": 2 }, { "type": "loss", "content": 0.005990367848426104, "timestamp": "2025-09-30 22:16:11.286616", "step": 2358, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:11.321998", "step": 2358, "epoch": 2 }, { "type": "loss", "content": 0.0067135924473404884, "timestamp": "2025-09-30 22:16:11.334311", "step": 2359, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:11.370658", "step": 2359, "epoch": 2 }, { "type": "loss", "content": 0.008387668989598751, "timestamp": "2025-09-30 22:16:11.402586", "step": 2360, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:11.441575", "step": 2360, "epoch": 2 }, { "type": "loss", "content": 0.014556759037077427, "timestamp": "2025-09-30 22:16:11.452505", "step": 2361, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:11.485920", "step": 2361, "epoch": 2 }, { "type": "loss", "content": 0.012365092523396015, "timestamp": "2025-09-30 22:16:11.496208", "step": 2362, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:11.533541", "step": 2362, "epoch": 2 }, { "type": "loss", "content": 0.0046156723983585835, "timestamp": "2025-09-30 22:16:11.544790", "step": 2363, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:11.587643", "step": 2363, "epoch": 2 }, { "type": "loss", "content": 0.005602897610515356, "timestamp": "2025-09-30 22:16:11.615795", "step": 2364, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:11.650319", "step": 2364, "epoch": 2 }, { "type": "loss", "content": 0.012667978182435036, "timestamp": "2025-09-30 22:16:11.655979", "step": 2365, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:11.695462", "step": 2365, "epoch": 2 }, { "type": "loss", "content": 0.012357855215668678, "timestamp": "2025-09-30 22:16:11.702762", "step": 2366, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:11.746803", "step": 2366, "epoch": 2 }, { "type": "loss", "content": 0.009272625669836998, "timestamp": "2025-09-30 22:16:11.760584", "step": 2367, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:11.801286", "step": 2367, "epoch": 2 }, { "type": "loss", "content": 0.006464353296905756, "timestamp": "2025-09-30 22:16:11.830158", "step": 2368, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:11.867584", "step": 2368, "epoch": 2 }, { "type": "loss", "content": 0.006658497266471386, "timestamp": "2025-09-30 22:16:11.880867", "step": 2369, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:11.920448", "step": 2369, "epoch": 2 }, { "type": "loss", "content": 0.00856840517371893, "timestamp": "2025-09-30 22:16:11.933019", "step": 2370, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:11.966895", "step": 2370, "epoch": 2 }, { "type": "loss", "content": 0.010510051622986794, "timestamp": "2025-09-30 22:16:11.974909", "step": 2371, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:12.010697", "step": 2371, "epoch": 2 }, { "type": "loss", "content": 0.014684943482279778, "timestamp": "2025-09-30 22:16:12.043669", "step": 2372, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:12.084295", "step": 2372, "epoch": 2 }, { "type": "loss", "content": 0.015482643619179726, "timestamp": "2025-09-30 22:16:12.097542", "step": 2373, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:12.136340", "step": 2373, "epoch": 2 }, { "type": "loss", "content": 0.0045043122954666615, "timestamp": "2025-09-30 22:16:12.148871", "step": 2374, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:12.191886", "step": 2374, "epoch": 2 }, { "type": "loss", "content": 0.0032745180651545525, "timestamp": "2025-09-30 22:16:12.205656", "step": 2375, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:12.249738", "step": 2375, "epoch": 2 }, { "type": "loss", "content": 0.01642913930118084, "timestamp": "2025-09-30 22:16:12.277673", "step": 2376, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:12.315312", "step": 2376, "epoch": 2 }, { "type": "loss", "content": 0.01349701825529337, "timestamp": "2025-09-30 22:16:12.320074", "step": 2377, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:12.369416", "step": 2377, "epoch": 2 }, { "type": "loss", "content": 0.009943741373717785, "timestamp": "2025-09-30 22:16:12.377247", "step": 2378, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:12.412531", "step": 2378, "epoch": 2 }, { "type": "loss", "content": 0.006427043117582798, "timestamp": "2025-09-30 22:16:12.425852", "step": 2379, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:12.461702", "step": 2379, "epoch": 2 }, { "type": "loss", "content": 0.00699341855943203, "timestamp": "2025-09-30 22:16:12.489860", "step": 2380, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:12.528088", "step": 2380, "epoch": 2 }, { "type": "loss", "content": 0.01376278419047594, "timestamp": "2025-09-30 22:16:12.533124", "step": 2381, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:12.567016", "step": 2381, "epoch": 2 }, { "type": "loss", "content": 0.009683472104370594, "timestamp": "2025-09-30 22:16:12.574638", "step": 2382, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:12.614887", "step": 2382, "epoch": 2 }, { "type": "loss", "content": 0.0067740269005298615, "timestamp": "2025-09-30 22:16:12.628244", "step": 2383, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:12.663627", "step": 2383, "epoch": 2 }, { "type": "loss", "content": 0.011330408044159412, "timestamp": "2025-09-30 22:16:12.692387", "step": 2384, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:12.733478", "step": 2384, "epoch": 2 }, { "type": "loss", "content": 0.007617818657308817, "timestamp": "2025-09-30 22:16:12.746519", "step": 2385, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:12.784572", "step": 2385, "epoch": 2 }, { "type": "loss", "content": 0.009767604991793633, "timestamp": "2025-09-30 22:16:12.798242", "step": 2386, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:12.839657", "step": 2386, "epoch": 2 }, { "type": "loss", "content": 0.01219944842159748, "timestamp": "2025-09-30 22:16:12.853019", "step": 2387, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:12.891201", "step": 2387, "epoch": 2 }, { "type": "loss", "content": 0.009838934056460857, "timestamp": "2025-09-30 22:16:12.925962", "step": 2388, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:12.960997", "step": 2388, "epoch": 2 }, { "type": "loss", "content": 0.006134702358394861, "timestamp": "2025-09-30 22:16:12.970896", "step": 2389, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:13.006589", "step": 2389, "epoch": 2 }, { "type": "loss", "content": 0.011561652645468712, "timestamp": "2025-09-30 22:16:13.017371", "step": 2390, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:13.061650", "step": 2390, "epoch": 2 }, { "type": "loss", "content": 0.009399686008691788, "timestamp": "2025-09-30 22:16:13.074252", "step": 2391, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:13.109015", "step": 2391, "epoch": 2 }, { "type": "loss", "content": 0.00535060977563262, "timestamp": "2025-09-30 22:16:13.142394", "step": 2392, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:13.181106", "step": 2392, "epoch": 2 }, { "type": "loss", "content": 0.01159277930855751, "timestamp": "2025-09-30 22:16:13.189000", "step": 2393, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:13.239405", "step": 2393, "epoch": 2 }, { "type": "loss", "content": 0.011592809110879898, "timestamp": "2025-09-30 22:16:13.246638", "step": 2394, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:13.279490", "step": 2394, "epoch": 2 }, { "type": "loss", "content": 0.010551626794040203, "timestamp": "2025-09-30 22:16:13.286589", "step": 2395, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:13.321047", "step": 2395, "epoch": 2 }, { "type": "loss", "content": 0.009111042134463787, "timestamp": "2025-09-30 22:16:13.348677", "step": 2396, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:13.380692", "step": 2396, "epoch": 2 }, { "type": "loss", "content": 0.009802721440792084, "timestamp": "2025-09-30 22:16:13.386543", "step": 2397, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:13.418794", "step": 2397, "epoch": 2 }, { "type": "loss", "content": 0.01750762201845646, "timestamp": "2025-09-30 22:16:13.425641", "step": 2398, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:13.476173", "step": 2398, "epoch": 2 }, { "type": "loss", "content": 0.008682657033205032, "timestamp": "2025-09-30 22:16:13.491744", "step": 2399, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:13.532182", "step": 2399, "epoch": 2 }, { "type": "loss", "content": 0.007306818384677172, "timestamp": "2025-09-30 22:16:13.566773", "step": 2400, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:13.602351", "step": 2400, "epoch": 2 }, { "type": "loss", "content": 0.006612339522689581, "timestamp": "2025-09-30 22:16:13.607719", "step": 2401, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:16:13.647546", "step": 2401, "epoch": 2 }, { "type": "loss", "content": 0.01855228841304779, "timestamp": "2025-09-30 22:16:13.651843", "step": 2402, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:13.684274", "step": 2402, "epoch": 2 }, { "type": "loss", "content": 0.009422802366316319, "timestamp": "2025-09-30 22:16:13.691819", "step": 2403, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:13.735767", "step": 2403, "epoch": 2 }, { "type": "loss", "content": 0.006999382749199867, "timestamp": "2025-09-30 22:16:13.769084", "step": 2404, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:13.806366", "step": 2404, "epoch": 2 }, { "type": "loss", "content": 0.01017130259424448, "timestamp": "2025-09-30 22:16:13.811600", "step": 2405, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:13.846575", "step": 2405, "epoch": 2 }, { "type": "loss", "content": 0.006849296856671572, "timestamp": "2025-09-30 22:16:13.858943", "step": 2406, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:13.898835", "step": 2406, "epoch": 2 }, { "type": "loss", "content": 0.0065921517089009285, "timestamp": "2025-09-30 22:16:13.906770", "step": 2407, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:13.939852", "step": 2407, "epoch": 2 }, { "type": "loss", "content": 0.010219089686870575, "timestamp": "2025-09-30 22:16:13.968598", "step": 2408, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:14.020548", "step": 2408, "epoch": 2 }, { "type": "loss", "content": 0.012665183283388615, "timestamp": "2025-09-30 22:16:14.026229", "step": 2409, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:14.072604", "step": 2409, "epoch": 2 }, { "type": "loss", "content": 0.00628651725128293, "timestamp": "2025-09-30 22:16:14.082962", "step": 2410, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:14.127701", "step": 2410, "epoch": 2 }, { "type": "loss", "content": 0.006621459033340216, "timestamp": "2025-09-30 22:16:14.141400", "step": 2411, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:16:14.180853", "step": 2411, "epoch": 2 }, { "type": "loss", "content": 0.005125768482685089, "timestamp": "2025-09-30 22:16:14.206457", "step": 2412, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:14.248047", "step": 2412, "epoch": 2 }, { "type": "loss", "content": 0.0072546289302408695, "timestamp": "2025-09-30 22:16:14.255996", "step": 2413, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:14.301296", "step": 2413, "epoch": 2 }, { "type": "loss", "content": 0.01086366269737482, "timestamp": "2025-09-30 22:16:14.308500", "step": 2414, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:14.344791", "step": 2414, "epoch": 2 }, { "type": "loss", "content": 0.007983213290572166, "timestamp": "2025-09-30 22:16:14.358434", "step": 2415, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:16:16.915306", "step": 2415, "epoch": 2 }, { "type": "pplx", "content": 5.737540649916071, "timestamp": "2025-09-30 22:16:16.919280", "step": 2415, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:16.955181", "step": 2415, "epoch": 2 }, { "type": "loss", "content": 0.006403180770576, "timestamp": "2025-09-30 22:16:16.989677", "step": 2416, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:17.022799", "step": 2416, "epoch": 2 }, { "type": "loss", "content": 0.00468442402780056, "timestamp": "2025-09-30 22:16:17.035425", "step": 2417, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:17.082390", "step": 2417, "epoch": 2 }, { "type": "loss", "content": 0.010039767250418663, "timestamp": "2025-09-30 22:16:17.090187", "step": 2418, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:17.121965", "step": 2418, "epoch": 2 }, { "type": "loss", "content": 0.006315943319350481, "timestamp": "2025-09-30 22:16:17.129959", "step": 2419, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:17.166227", "step": 2419, "epoch": 2 }, { "type": "loss", "content": 0.007796914782375097, "timestamp": "2025-09-30 22:16:17.197906", "step": 2420, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:16:17.243771", "step": 2420, "epoch": 2 }, { "type": "loss", "content": 0.008052662014961243, "timestamp": "2025-09-30 22:16:17.259645", "step": 2421, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:17.291402", "step": 2421, "epoch": 2 }, { "type": "loss", "content": 0.015868842601776123, "timestamp": "2025-09-30 22:16:17.302606", "step": 2422, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:17.335296", "step": 2422, "epoch": 2 }, { "type": "loss", "content": 0.009983470663428307, "timestamp": "2025-09-30 22:16:17.347470", "step": 2423, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:17.382111", "step": 2423, "epoch": 2 }, { "type": "loss", "content": 0.00888828095048666, "timestamp": "2025-09-30 22:16:17.410939", "step": 2424, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 576 ], "flops": 17085996872448 }, "timestamp": "2025-09-30 22:16:17.463049", "step": 2424, "epoch": 2 }, { "type": "loss", "content": 0.008122816681861877, "timestamp": "2025-09-30 22:16:17.482311", "step": 2425, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:17.520721", "step": 2425, "epoch": 2 }, { "type": "loss", "content": 0.004961279686540365, "timestamp": "2025-09-30 22:16:17.534678", "step": 2426, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:17.574067", "step": 2426, "epoch": 2 }, { "type": "loss", "content": 0.01577206328511238, "timestamp": "2025-09-30 22:16:17.584434", "step": 2427, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:17.619610", "step": 2427, "epoch": 2 }, { "type": "loss", "content": 0.012107166461646557, "timestamp": "2025-09-30 22:16:17.650878", "step": 2428, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:17.684781", "step": 2428, "epoch": 2 }, { "type": "loss", "content": 0.012150055728852749, "timestamp": "2025-09-30 22:16:17.695378", "step": 2429, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:17.733725", "step": 2429, "epoch": 2 }, { "type": "loss", "content": 0.0029978309758007526, "timestamp": "2025-09-30 22:16:17.747736", "step": 2430, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:17.785934", "step": 2430, "epoch": 2 }, { "type": "loss", "content": 0.005970521830022335, "timestamp": "2025-09-30 22:16:17.799720", "step": 2431, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:17.836452", "step": 2431, "epoch": 2 }, { "type": "loss", "content": 0.014105760492384434, "timestamp": "2025-09-30 22:16:17.864695", "step": 2432, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:17.902411", "step": 2432, "epoch": 2 }, { "type": "loss", "content": 0.008667535148561, "timestamp": "2025-09-30 22:16:17.911159", "step": 2433, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:17.948623", "step": 2433, "epoch": 2 }, { "type": "loss", "content": 0.009693967178463936, "timestamp": "2025-09-30 22:16:17.959544", "step": 2434, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:17.995178", "step": 2434, "epoch": 2 }, { "type": "loss", "content": 0.011925452388823032, "timestamp": "2025-09-30 22:16:18.006317", "step": 2435, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:18.043425", "step": 2435, "epoch": 2 }, { "type": "loss", "content": 0.006588513497263193, "timestamp": "2025-09-30 22:16:18.074604", "step": 2436, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:16:18.127126", "step": 2436, "epoch": 2 }, { "type": "loss", "content": 0.005089726764708757, "timestamp": "2025-09-30 22:16:18.143914", "step": 2437, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:18.177605", "step": 2437, "epoch": 2 }, { "type": "loss", "content": 0.006040696520358324, "timestamp": "2025-09-30 22:16:18.184623", "step": 2438, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:18.220581", "step": 2438, "epoch": 2 }, { "type": "loss", "content": 0.00973731279373169, "timestamp": "2025-09-30 22:16:18.230888", "step": 2439, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:18.264961", "step": 2439, "epoch": 2 }, { "type": "loss", "content": 0.015074980445206165, "timestamp": "2025-09-30 22:16:18.293759", "step": 2440, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:18.329568", "step": 2440, "epoch": 2 }, { "type": "loss", "content": 0.0043279207311570644, "timestamp": "2025-09-30 22:16:18.336144", "step": 2441, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:18.376995", "step": 2441, "epoch": 2 }, { "type": "loss", "content": 0.010119735263288021, "timestamp": "2025-09-30 22:16:18.388059", "step": 2442, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:18.429741", "step": 2442, "epoch": 2 }, { "type": "loss", "content": 0.008410328067839146, "timestamp": "2025-09-30 22:16:18.443218", "step": 2443, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:18.480669", "step": 2443, "epoch": 2 }, { "type": "loss", "content": 0.009942535310983658, "timestamp": "2025-09-30 22:16:18.512748", "step": 2444, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:18.549460", "step": 2444, "epoch": 2 }, { "type": "loss", "content": 0.01868273876607418, "timestamp": "2025-09-30 22:16:18.559537", "step": 2445, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:18.597734", "step": 2445, "epoch": 2 }, { "type": "loss", "content": 0.012855646200478077, "timestamp": "2025-09-30 22:16:18.605047", "step": 2446, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:18.648395", "step": 2446, "epoch": 2 }, { "type": "loss", "content": 0.009789690375328064, "timestamp": "2025-09-30 22:16:18.656230", "step": 2447, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:18.691108", "step": 2447, "epoch": 2 }, { "type": "loss", "content": 0.008820690214633942, "timestamp": "2025-09-30 22:16:18.723033", "step": 2448, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:18.755344", "step": 2448, "epoch": 2 }, { "type": "loss", "content": 0.008334523998200893, "timestamp": "2025-09-30 22:16:18.760621", "step": 2449, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:18.803603", "step": 2449, "epoch": 2 }, { "type": "loss", "content": 0.007933998480439186, "timestamp": "2025-09-30 22:16:18.814007", "step": 2450, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:18.854635", "step": 2450, "epoch": 2 }, { "type": "loss", "content": 0.006995997857302427, "timestamp": "2025-09-30 22:16:18.868573", "step": 2451, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:18.907642", "step": 2451, "epoch": 2 }, { "type": "loss", "content": 0.01130268257111311, "timestamp": "2025-09-30 22:16:18.935684", "step": 2452, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:18.969416", "step": 2452, "epoch": 2 }, { "type": "loss", "content": 0.011828478425741196, "timestamp": "2025-09-30 22:16:18.978069", "step": 2453, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:19.018470", "step": 2453, "epoch": 2 }, { "type": "loss", "content": 0.015309597365558147, "timestamp": "2025-09-30 22:16:19.028841", "step": 2454, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:19.071713", "step": 2454, "epoch": 2 }, { "type": "loss", "content": 0.007825467735528946, "timestamp": "2025-09-30 22:16:19.082826", "step": 2455, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:19.117512", "step": 2455, "epoch": 2 }, { "type": "loss", "content": 0.01192080695182085, "timestamp": "2025-09-30 22:16:19.149548", "step": 2456, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:19.188800", "step": 2456, "epoch": 2 }, { "type": "loss", "content": 0.006193151697516441, "timestamp": "2025-09-30 22:16:19.196221", "step": 2457, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:19.236831", "step": 2457, "epoch": 2 }, { "type": "loss", "content": 0.013730679638683796, "timestamp": "2025-09-30 22:16:19.247849", "step": 2458, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:19.281047", "step": 2458, "epoch": 2 }, { "type": "loss", "content": 0.003911986015737057, "timestamp": "2025-09-30 22:16:19.292332", "step": 2459, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:19.330889", "step": 2459, "epoch": 2 }, { "type": "loss", "content": 0.008487740531563759, "timestamp": "2025-09-30 22:16:19.364271", "step": 2460, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:19.400229", "step": 2460, "epoch": 2 }, { "type": "loss", "content": 0.006258985493332148, "timestamp": "2025-09-30 22:16:19.405799", "step": 2461, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:19.442597", "step": 2461, "epoch": 2 }, { "type": "loss", "content": 0.01047790702432394, "timestamp": "2025-09-30 22:16:19.452943", "step": 2462, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:19.485640", "step": 2462, "epoch": 2 }, { "type": "loss", "content": 0.008732033893465996, "timestamp": "2025-09-30 22:16:19.496708", "step": 2463, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:19.529525", "step": 2463, "epoch": 2 }, { "type": "loss", "content": 0.012635108083486557, "timestamp": "2025-09-30 22:16:19.559028", "step": 2464, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:19.595619", "step": 2464, "epoch": 2 }, { "type": "loss", "content": 0.00821733009070158, "timestamp": "2025-09-30 22:16:19.608629", "step": 2465, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:16:19.651985", "step": 2465, "epoch": 2 }, { "type": "loss", "content": 0.011071404442191124, "timestamp": "2025-09-30 22:16:19.669233", "step": 2466, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:19.707719", "step": 2466, "epoch": 2 }, { "type": "loss", "content": 0.00789080373942852, "timestamp": "2025-09-30 22:16:19.721745", "step": 2467, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:19.760665", "step": 2467, "epoch": 2 }, { "type": "loss", "content": 0.010599637404084206, "timestamp": "2025-09-30 22:16:19.795171", "step": 2468, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:19.828950", "step": 2468, "epoch": 2 }, { "type": "loss", "content": 0.010458008386194706, "timestamp": "2025-09-30 22:16:19.835183", "step": 2469, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:19.873203", "step": 2469, "epoch": 2 }, { "type": "loss", "content": 0.008729826658964157, "timestamp": "2025-09-30 22:16:19.886927", "step": 2470, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:16:19.932519", "step": 2470, "epoch": 2 }, { "type": "loss", "content": 0.0057611926458776, "timestamp": "2025-09-30 22:16:19.949570", "step": 2471, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:19.998869", "step": 2471, "epoch": 2 }, { "type": "loss", "content": 0.00834943912923336, "timestamp": "2025-09-30 22:16:20.033453", "step": 2472, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:20.076278", "step": 2472, "epoch": 2 }, { "type": "loss", "content": 0.00887401681393385, "timestamp": "2025-09-30 22:16:20.089600", "step": 2473, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:16:20.137178", "step": 2473, "epoch": 2 }, { "type": "loss", "content": 0.007680539041757584, "timestamp": "2025-09-30 22:16:20.153494", "step": 2474, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:16:20.197366", "step": 2474, "epoch": 2 }, { "type": "loss", "content": 0.004287196788936853, "timestamp": "2025-09-30 22:16:20.214669", "step": 2475, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:20.252421", "step": 2475, "epoch": 2 }, { "type": "loss", "content": 0.007217390462756157, "timestamp": "2025-09-30 22:16:20.287328", "step": 2476, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:16:20.335581", "step": 2476, "epoch": 2 }, { "type": "loss", "content": 0.0035889248829334974, "timestamp": "2025-09-30 22:16:20.351437", "step": 2477, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:20.384709", "step": 2477, "epoch": 2 }, { "type": "loss", "content": 0.02392013557255268, "timestamp": "2025-09-30 22:16:20.392412", "step": 2478, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:20.426764", "step": 2478, "epoch": 2 }, { "type": "loss", "content": 0.003831626381725073, "timestamp": "2025-09-30 22:16:20.441049", "step": 2479, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:20.478795", "step": 2479, "epoch": 2 }, { "type": "loss", "content": 0.011907713487744331, "timestamp": "2025-09-30 22:16:20.511857", "step": 2480, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:20.548947", "step": 2480, "epoch": 2 }, { "type": "loss", "content": 0.008370657451450825, "timestamp": "2025-09-30 22:16:20.558592", "step": 2481, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:20.591530", "step": 2481, "epoch": 2 }, { "type": "loss", "content": 0.005929028615355492, "timestamp": "2025-09-30 22:16:20.603878", "step": 2482, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:20.637823", "step": 2482, "epoch": 2 }, { "type": "loss", "content": 0.007656347006559372, "timestamp": "2025-09-30 22:16:20.650373", "step": 2483, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:20.683612", "step": 2483, "epoch": 2 }, { "type": "loss", "content": 0.0049138241447508335, "timestamp": "2025-09-30 22:16:20.714766", "step": 2484, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:20.750051", "step": 2484, "epoch": 2 }, { "type": "loss", "content": 0.005719912238419056, "timestamp": "2025-09-30 22:16:20.762751", "step": 2485, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:20.799487", "step": 2485, "epoch": 2 }, { "type": "loss", "content": 0.011933359317481518, "timestamp": "2025-09-30 22:16:20.812828", "step": 2486, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:20.846310", "step": 2486, "epoch": 2 }, { "type": "loss", "content": 0.018113916739821434, "timestamp": "2025-09-30 22:16:20.853880", "step": 2487, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:20.890287", "step": 2487, "epoch": 2 }, { "type": "loss", "content": 0.012025549076497555, "timestamp": "2025-09-30 22:16:20.918608", "step": 2488, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:20.953535", "step": 2488, "epoch": 2 }, { "type": "loss", "content": 0.007998720742762089, "timestamp": "2025-09-30 22:16:20.961927", "step": 2489, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:20.997830", "step": 2489, "epoch": 2 }, { "type": "loss", "content": 0.005165472161024809, "timestamp": "2025-09-30 22:16:21.010097", "step": 2490, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:21.049700", "step": 2490, "epoch": 2 }, { "type": "loss", "content": 0.007589935790747404, "timestamp": "2025-09-30 22:16:21.056953", "step": 2491, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:21.092870", "step": 2491, "epoch": 2 }, { "type": "loss", "content": 0.0076933749951422215, "timestamp": "2025-09-30 22:16:21.126056", "step": 2492, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:21.160510", "step": 2492, "epoch": 2 }, { "type": "loss", "content": 0.0104972580447793, "timestamp": "2025-09-30 22:16:21.166457", "step": 2493, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:21.211090", "step": 2493, "epoch": 2 }, { "type": "loss", "content": 0.013154943473637104, "timestamp": "2025-09-30 22:16:21.221173", "step": 2494, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:21.255937", "step": 2494, "epoch": 2 }, { "type": "loss", "content": 0.011266938410699368, "timestamp": "2025-09-30 22:16:21.262921", "step": 2495, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:21.297006", "step": 2495, "epoch": 2 }, { "type": "loss", "content": 0.011694843880832195, "timestamp": "2025-09-30 22:16:21.328117", "step": 2496, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:21.364606", "step": 2496, "epoch": 2 }, { "type": "loss", "content": 0.006242073141038418, "timestamp": "2025-09-30 22:16:21.372335", "step": 2497, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:21.408912", "step": 2497, "epoch": 2 }, { "type": "loss", "content": 0.014575202949345112, "timestamp": "2025-09-30 22:16:21.421445", "step": 2498, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:21.457475", "step": 2498, "epoch": 2 }, { "type": "loss", "content": 0.006570762023329735, "timestamp": "2025-09-30 22:16:21.467671", "step": 2499, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:21.501720", "step": 2499, "epoch": 2 }, { "type": "loss", "content": 0.0061673750169575214, "timestamp": "2025-09-30 22:16:21.530445", "step": 2500, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 2500", "timestamp": "2025-09-30 22:16:26.366956", "step": 2500, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:26.406129", "step": 2500, "epoch": 2 }, { "type": "loss", "content": 0.010745136067271233, "timestamp": "2025-09-30 22:16:26.410270", "step": 2501, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:26.442760", "step": 2501, "epoch": 2 }, { "type": "loss", "content": 0.00644065672531724, "timestamp": "2025-09-30 22:16:26.449422", "step": 2502, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:26.481597", "step": 2502, "epoch": 2 }, { "type": "loss", "content": 0.016653813421726227, "timestamp": "2025-09-30 22:16:26.489325", "step": 2503, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:26.527490", "step": 2503, "epoch": 2 }, { "type": "loss", "content": 0.011729870922863483, "timestamp": "2025-09-30 22:16:26.556100", "step": 2504, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:26.589616", "step": 2504, "epoch": 2 }, { "type": "loss", "content": 0.006627806928008795, "timestamp": "2025-09-30 22:16:26.597553", "step": 2505, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:26.638820", "step": 2505, "epoch": 2 }, { "type": "loss", "content": 0.005113981198519468, "timestamp": "2025-09-30 22:16:26.651402", "step": 2506, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:26.683533", "step": 2506, "epoch": 2 }, { "type": "loss", "content": 0.004428889602422714, "timestamp": "2025-09-30 22:16:26.688899", "step": 2507, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:26.726642", "step": 2507, "epoch": 2 }, { "type": "loss", "content": 0.025327321141958237, "timestamp": "2025-09-30 22:16:26.754278", "step": 2508, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:26.790840", "step": 2508, "epoch": 2 }, { "type": "loss", "content": 0.01675552688539028, "timestamp": "2025-09-30 22:16:26.800592", "step": 2509, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:26.835688", "step": 2509, "epoch": 2 }, { "type": "loss", "content": 0.0040430487133562565, "timestamp": "2025-09-30 22:16:26.846124", "step": 2510, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:26.886527", "step": 2510, "epoch": 2 }, { "type": "loss", "content": 0.007266952656209469, "timestamp": "2025-09-30 22:16:26.899859", "step": 2511, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:26.937660", "step": 2511, "epoch": 2 }, { "type": "loss", "content": 0.01092456839978695, "timestamp": "2025-09-30 22:16:26.966426", "step": 2512, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:26.998657", "step": 2512, "epoch": 2 }, { "type": "loss", "content": 0.00776576017960906, "timestamp": "2025-09-30 22:16:27.006758", "step": 2513, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:27.047301", "step": 2513, "epoch": 2 }, { "type": "loss", "content": 0.009044959209859371, "timestamp": "2025-09-30 22:16:27.063161", "step": 2514, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:27.102847", "step": 2514, "epoch": 2 }, { "type": "loss", "content": 0.005568441469222307, "timestamp": "2025-09-30 22:16:27.116192", "step": 2515, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:27.156933", "step": 2515, "epoch": 2 }, { "type": "loss", "content": 0.009347072802484035, "timestamp": "2025-09-30 22:16:27.191520", "step": 2516, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:27.233377", "step": 2516, "epoch": 2 }, { "type": "loss", "content": 0.008022931404411793, "timestamp": "2025-09-30 22:16:27.246390", "step": 2517, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:27.285188", "step": 2517, "epoch": 2 }, { "type": "loss", "content": 0.008891950361430645, "timestamp": "2025-09-30 22:16:27.297429", "step": 2518, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:27.330607", "step": 2518, "epoch": 2 }, { "type": "loss", "content": 0.00859333761036396, "timestamp": "2025-09-30 22:16:27.338533", "step": 2519, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:27.375467", "step": 2519, "epoch": 2 }, { "type": "loss", "content": 0.005585793871432543, "timestamp": "2025-09-30 22:16:27.409989", "step": 2520, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:27.449868", "step": 2520, "epoch": 2 }, { "type": "loss", "content": 0.010565678589046001, "timestamp": "2025-09-30 22:16:27.462581", "step": 2521, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:27.507750", "step": 2521, "epoch": 2 }, { "type": "loss", "content": 0.01040729507803917, "timestamp": "2025-09-30 22:16:27.521433", "step": 2522, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:27.559216", "step": 2522, "epoch": 2 }, { "type": "loss", "content": 0.006701262202113867, "timestamp": "2025-09-30 22:16:27.565955", "step": 2523, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:27.601023", "step": 2523, "epoch": 2 }, { "type": "loss", "content": 0.005230138543993235, "timestamp": "2025-09-30 22:16:27.629372", "step": 2524, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:27.667404", "step": 2524, "epoch": 2 }, { "type": "loss", "content": 0.010266603901982307, "timestamp": "2025-09-30 22:16:27.682462", "step": 2525, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:27.717250", "step": 2525, "epoch": 2 }, { "type": "loss", "content": 0.0036118924617767334, "timestamp": "2025-09-30 22:16:27.727497", "step": 2526, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:27.763559", "step": 2526, "epoch": 2 }, { "type": "loss", "content": 0.007853069342672825, "timestamp": "2025-09-30 22:16:27.770406", "step": 2527, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:27.808173", "step": 2527, "epoch": 2 }, { "type": "loss", "content": 0.00653937179595232, "timestamp": "2025-09-30 22:16:27.839559", "step": 2528, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:27.873332", "step": 2528, "epoch": 2 }, { "type": "loss", "content": 0.010696524754166603, "timestamp": "2025-09-30 22:16:27.878801", "step": 2529, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:27.919553", "step": 2529, "epoch": 2 }, { "type": "loss", "content": 0.014034731313586235, "timestamp": "2025-09-30 22:16:27.931896", "step": 2530, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:16:30.424563", "step": 2530, "epoch": 2 }, { "type": "pplx", "content": 5.654377316625182, "timestamp": "2025-09-30 22:16:30.428687", "step": 2530, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:30.464281", "step": 2530, "epoch": 2 }, { "type": "loss", "content": 0.00823940522968769, "timestamp": "2025-09-30 22:16:30.471104", "step": 2531, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:30.522552", "step": 2531, "epoch": 2 }, { "type": "loss", "content": 0.015429342165589333, "timestamp": "2025-09-30 22:16:30.555856", "step": 2532, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:30.592964", "step": 2532, "epoch": 2 }, { "type": "loss", "content": 0.008980813436210155, "timestamp": "2025-09-30 22:16:30.605934", "step": 2533, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:30.639244", "step": 2533, "epoch": 2 }, { "type": "loss", "content": 0.007303296122699976, "timestamp": "2025-09-30 22:16:30.649415", "step": 2534, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:30.685297", "step": 2534, "epoch": 2 }, { "type": "loss", "content": 0.013391010463237762, "timestamp": "2025-09-30 22:16:30.695787", "step": 2535, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:30.738871", "step": 2535, "epoch": 2 }, { "type": "loss", "content": 0.007490305695682764, "timestamp": "2025-09-30 22:16:30.770795", "step": 2536, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:30.810229", "step": 2536, "epoch": 2 }, { "type": "loss", "content": 0.0018074701074510813, "timestamp": "2025-09-30 22:16:30.817979", "step": 2537, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:30.852843", "step": 2537, "epoch": 2 }, { "type": "loss", "content": 0.012867008335888386, "timestamp": "2025-09-30 22:16:30.859920", "step": 2538, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:30.893288", "step": 2538, "epoch": 2 }, { "type": "loss", "content": 0.00832181703299284, "timestamp": "2025-09-30 22:16:30.904228", "step": 2539, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:16:30.945095", "step": 2539, "epoch": 2 }, { "type": "loss", "content": 0.00538244005292654, "timestamp": "2025-09-30 22:16:30.982099", "step": 2540, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:31.018686", "step": 2540, "epoch": 2 }, { "type": "loss", "content": 0.00587612995877862, "timestamp": "2025-09-30 22:16:31.022409", "step": 2541, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:31.062304", "step": 2541, "epoch": 2 }, { "type": "loss", "content": 0.016657264903187752, "timestamp": "2025-09-30 22:16:31.069281", "step": 2542, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:31.108102", "step": 2542, "epoch": 2 }, { "type": "loss", "content": 0.011934111826121807, "timestamp": "2025-09-30 22:16:31.115634", "step": 2543, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:31.147905", "step": 2543, "epoch": 2 }, { "type": "loss", "content": 0.005036134272813797, "timestamp": "2025-09-30 22:16:31.179686", "step": 2544, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:31.216022", "step": 2544, "epoch": 2 }, { "type": "loss", "content": 0.010882146656513214, "timestamp": "2025-09-30 22:16:31.221624", "step": 2545, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:31.253776", "step": 2545, "epoch": 2 }, { "type": "loss", "content": 0.01084787119179964, "timestamp": "2025-09-30 22:16:31.264877", "step": 2546, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:31.299924", "step": 2546, "epoch": 2 }, { "type": "loss", "content": 0.01012638583779335, "timestamp": "2025-09-30 22:16:31.311991", "step": 2547, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:31.345813", "step": 2547, "epoch": 2 }, { "type": "loss", "content": 0.006890283897519112, "timestamp": "2025-09-30 22:16:31.373863", "step": 2548, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:31.408098", "step": 2548, "epoch": 2 }, { "type": "loss", "content": 0.010032973252236843, "timestamp": "2025-09-30 22:16:31.416516", "step": 2549, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:31.449055", "step": 2549, "epoch": 2 }, { "type": "loss", "content": 0.011477479711174965, "timestamp": "2025-09-30 22:16:31.455962", "step": 2550, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:31.488570", "step": 2550, "epoch": 2 }, { "type": "loss", "content": 0.007900014519691467, "timestamp": "2025-09-30 22:16:31.495300", "step": 2551, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:31.532558", "step": 2551, "epoch": 2 }, { "type": "loss", "content": 0.015551622956991196, "timestamp": "2025-09-30 22:16:31.560961", "step": 2552, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:31.594401", "step": 2552, "epoch": 2 }, { "type": "loss", "content": 0.009452381171286106, "timestamp": "2025-09-30 22:16:31.599608", "step": 2553, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:31.644040", "step": 2553, "epoch": 2 }, { "type": "loss", "content": 0.004166701342910528, "timestamp": "2025-09-30 22:16:31.651274", "step": 2554, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:31.701101", "step": 2554, "epoch": 2 }, { "type": "loss", "content": 0.015731127932667732, "timestamp": "2025-09-30 22:16:31.709088", "step": 2555, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:31.745287", "step": 2555, "epoch": 2 }, { "type": "loss", "content": 0.004945465829223394, "timestamp": "2025-09-30 22:16:31.776352", "step": 2556, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:31.811329", "step": 2556, "epoch": 2 }, { "type": "loss", "content": 0.007961086928844452, "timestamp": "2025-09-30 22:16:31.816408", "step": 2557, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:31.853086", "step": 2557, "epoch": 2 }, { "type": "loss", "content": 0.008037255145609379, "timestamp": "2025-09-30 22:16:31.863402", "step": 2558, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:16:31.906718", "step": 2558, "epoch": 2 }, { "type": "loss", "content": 0.004936587065458298, "timestamp": "2025-09-30 22:16:31.911001", "step": 2559, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:31.949467", "step": 2559, "epoch": 2 }, { "type": "loss", "content": 0.015803245827555656, "timestamp": "2025-09-30 22:16:31.974932", "step": 2560, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:32.017608", "step": 2560, "epoch": 2 }, { "type": "loss", "content": 0.00856830459088087, "timestamp": "2025-09-30 22:16:32.022313", "step": 2561, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:32.079728", "step": 2561, "epoch": 2 }, { "type": "loss", "content": 0.02954977937042713, "timestamp": "2025-09-30 22:16:32.086470", "step": 2562, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:32.127790", "step": 2562, "epoch": 2 }, { "type": "loss", "content": 0.010794623754918575, "timestamp": "2025-09-30 22:16:32.134859", "step": 2563, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:32.167878", "step": 2563, "epoch": 2 }, { "type": "loss", "content": 0.0016124057583510876, "timestamp": "2025-09-30 22:16:32.195796", "step": 2564, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:32.233460", "step": 2564, "epoch": 2 }, { "type": "loss", "content": 0.0062332903034985065, "timestamp": "2025-09-30 22:16:32.238652", "step": 2565, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:32.272115", "step": 2565, "epoch": 2 }, { "type": "loss", "content": 0.002981910016387701, "timestamp": "2025-09-30 22:16:32.279156", "step": 2566, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:32.314210", "step": 2566, "epoch": 2 }, { "type": "loss", "content": 0.008241718634963036, "timestamp": "2025-09-30 22:16:32.321503", "step": 2567, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:32.362712", "step": 2567, "epoch": 2 }, { "type": "loss", "content": 0.007429973687976599, "timestamp": "2025-09-30 22:16:32.399485", "step": 2568, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:32.433979", "step": 2568, "epoch": 2 }, { "type": "loss", "content": 0.011728125624358654, "timestamp": "2025-09-30 22:16:32.439461", "step": 2569, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:32.475116", "step": 2569, "epoch": 2 }, { "type": "loss", "content": 0.008637667633593082, "timestamp": "2025-09-30 22:16:32.482225", "step": 2570, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:32.518689", "step": 2570, "epoch": 2 }, { "type": "loss", "content": 0.009170379489660263, "timestamp": "2025-09-30 22:16:32.529468", "step": 2571, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:32.570697", "step": 2571, "epoch": 2 }, { "type": "loss", "content": 0.005199885461479425, "timestamp": "2025-09-30 22:16:32.605392", "step": 2572, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:32.641528", "step": 2572, "epoch": 2 }, { "type": "loss", "content": 0.020000383257865906, "timestamp": "2025-09-30 22:16:32.650019", "step": 2573, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:32.686321", "step": 2573, "epoch": 2 }, { "type": "loss", "content": 0.008462299592792988, "timestamp": "2025-09-30 22:16:32.693378", "step": 2574, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:32.728326", "step": 2574, "epoch": 2 }, { "type": "loss", "content": 0.004085686057806015, "timestamp": "2025-09-30 22:16:32.735849", "step": 2575, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:32.772599", "step": 2575, "epoch": 2 }, { "type": "loss", "content": 0.013277919963002205, "timestamp": "2025-09-30 22:16:32.803629", "step": 2576, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:32.844427", "step": 2576, "epoch": 2 }, { "type": "loss", "content": 0.005645753815770149, "timestamp": "2025-09-30 22:16:32.850083", "step": 2577, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:32.888253", "step": 2577, "epoch": 2 }, { "type": "loss", "content": 0.0074727097526192665, "timestamp": "2025-09-30 22:16:32.900712", "step": 2578, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:32.941864", "step": 2578, "epoch": 2 }, { "type": "loss", "content": 0.007461579516530037, "timestamp": "2025-09-30 22:16:32.952756", "step": 2579, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:32.987664", "step": 2579, "epoch": 2 }, { "type": "loss", "content": 0.012325400486588478, "timestamp": "2025-09-30 22:16:33.015601", "step": 2580, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:33.054659", "step": 2580, "epoch": 2 }, { "type": "loss", "content": 0.00944701861590147, "timestamp": "2025-09-30 22:16:33.069755", "step": 2581, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:33.105362", "step": 2581, "epoch": 2 }, { "type": "loss", "content": 0.012234282679855824, "timestamp": "2025-09-30 22:16:33.113181", "step": 2582, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:33.149167", "step": 2582, "epoch": 2 }, { "type": "loss", "content": 0.0048253824934363365, "timestamp": "2025-09-30 22:16:33.159244", "step": 2583, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:33.194481", "step": 2583, "epoch": 2 }, { "type": "loss", "content": 0.013428851962089539, "timestamp": "2025-09-30 22:16:33.225744", "step": 2584, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:33.267188", "step": 2584, "epoch": 2 }, { "type": "loss", "content": 0.011841162107884884, "timestamp": "2025-09-30 22:16:33.272662", "step": 2585, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:16:33.317346", "step": 2585, "epoch": 2 }, { "type": "loss", "content": 0.010258554480969906, "timestamp": "2025-09-30 22:16:33.334686", "step": 2586, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:33.370633", "step": 2586, "epoch": 2 }, { "type": "loss", "content": 0.005433372687548399, "timestamp": "2025-09-30 22:16:33.382688", "step": 2587, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:33.418001", "step": 2587, "epoch": 2 }, { "type": "loss", "content": 0.017100220546126366, "timestamp": "2025-09-30 22:16:33.446271", "step": 2588, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:33.480058", "step": 2588, "epoch": 2 }, { "type": "loss", "content": 0.009693666361272335, "timestamp": "2025-09-30 22:16:33.490248", "step": 2589, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:33.524849", "step": 2589, "epoch": 2 }, { "type": "loss", "content": 0.01575091853737831, "timestamp": "2025-09-30 22:16:33.532749", "step": 2590, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:33.566359", "step": 2590, "epoch": 2 }, { "type": "loss", "content": 0.013555164448916912, "timestamp": "2025-09-30 22:16:33.573939", "step": 2591, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:33.620980", "step": 2591, "epoch": 2 }, { "type": "loss", "content": 0.013175432570278645, "timestamp": "2025-09-30 22:16:33.649725", "step": 2592, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:33.685842", "step": 2592, "epoch": 2 }, { "type": "loss", "content": 0.007773387245833874, "timestamp": "2025-09-30 22:16:33.694664", "step": 2593, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:33.730637", "step": 2593, "epoch": 2 }, { "type": "loss", "content": 0.008757934905588627, "timestamp": "2025-09-30 22:16:33.741739", "step": 2594, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:33.777947", "step": 2594, "epoch": 2 }, { "type": "loss", "content": 0.005879280623048544, "timestamp": "2025-09-30 22:16:33.788979", "step": 2595, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:33.829584", "step": 2595, "epoch": 2 }, { "type": "loss", "content": 0.007139183115214109, "timestamp": "2025-09-30 22:16:33.864212", "step": 2596, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:33.897771", "step": 2596, "epoch": 2 }, { "type": "loss", "content": 0.008979469537734985, "timestamp": "2025-09-30 22:16:33.906208", "step": 2597, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:33.949431", "step": 2597, "epoch": 2 }, { "type": "loss", "content": 0.010954844765365124, "timestamp": "2025-09-30 22:16:33.960462", "step": 2598, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:33.995326", "step": 2598, "epoch": 2 }, { "type": "loss", "content": 0.008132541552186012, "timestamp": "2025-09-30 22:16:34.006472", "step": 2599, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:34.047075", "step": 2599, "epoch": 2 }, { "type": "loss", "content": 0.008435402065515518, "timestamp": "2025-09-30 22:16:34.080552", "step": 2600, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:34.123059", "step": 2600, "epoch": 2 }, { "type": "loss", "content": 0.019805217161774635, "timestamp": "2025-09-30 22:16:34.131148", "step": 2601, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:34.174337", "step": 2601, "epoch": 2 }, { "type": "loss", "content": 0.008815511129796505, "timestamp": "2025-09-30 22:16:34.185605", "step": 2602, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:34.224405", "step": 2602, "epoch": 2 }, { "type": "loss", "content": 0.007207936607301235, "timestamp": "2025-09-30 22:16:34.238341", "step": 2603, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:34.275919", "step": 2603, "epoch": 2 }, { "type": "loss", "content": 0.017533591017127037, "timestamp": "2025-09-30 22:16:34.310609", "step": 2604, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:34.348011", "step": 2604, "epoch": 2 }, { "type": "loss", "content": 0.008848981000483036, "timestamp": "2025-09-30 22:16:34.361129", "step": 2605, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:34.394621", "step": 2605, "epoch": 2 }, { "type": "loss", "content": 0.006609481293708086, "timestamp": "2025-09-30 22:16:34.404896", "step": 2606, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:34.441784", "step": 2606, "epoch": 2 }, { "type": "loss", "content": 0.005682497750967741, "timestamp": "2025-09-30 22:16:34.455550", "step": 2607, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:34.493287", "step": 2607, "epoch": 2 }, { "type": "loss", "content": 0.010640106163918972, "timestamp": "2025-09-30 22:16:34.525326", "step": 2608, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:34.566598", "step": 2608, "epoch": 2 }, { "type": "loss", "content": 0.011010797694325447, "timestamp": "2025-09-30 22:16:34.576558", "step": 2609, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 640 ], "flops": 18984411776512 }, "timestamp": "2025-09-30 22:16:34.641726", "step": 2609, "epoch": 2 }, { "type": "loss", "content": 0.004885141737759113, "timestamp": "2025-09-30 22:16:34.663503", "step": 2610, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:34.700772", "step": 2610, "epoch": 2 }, { "type": "loss", "content": 0.0041885981336236, "timestamp": "2025-09-30 22:16:34.711163", "step": 2611, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:34.746051", "step": 2611, "epoch": 2 }, { "type": "loss", "content": 0.007177658379077911, "timestamp": "2025-09-30 22:16:34.771359", "step": 2612, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:34.819069", "step": 2612, "epoch": 2 }, { "type": "loss", "content": 0.02318485453724861, "timestamp": "2025-09-30 22:16:34.823663", "step": 2613, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:34.869241", "step": 2613, "epoch": 2 }, { "type": "loss", "content": 0.003985985182225704, "timestamp": "2025-09-30 22:16:34.876722", "step": 2614, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:34.915102", "step": 2614, "epoch": 2 }, { "type": "loss", "content": 0.012808909639716148, "timestamp": "2025-09-30 22:16:34.922383", "step": 2615, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:34.965188", "step": 2615, "epoch": 2 }, { "type": "loss", "content": 0.0033121935557574034, "timestamp": "2025-09-30 22:16:34.999702", "step": 2616, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:35.051946", "step": 2616, "epoch": 2 }, { "type": "loss", "content": 0.008870789781212807, "timestamp": "2025-09-30 22:16:35.060216", "step": 2617, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:35.099110", "step": 2617, "epoch": 2 }, { "type": "loss", "content": 0.007540592923760414, "timestamp": "2025-09-30 22:16:35.112465", "step": 2618, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:35.158616", "step": 2618, "epoch": 2 }, { "type": "loss", "content": 0.006059629376977682, "timestamp": "2025-09-30 22:16:35.172077", "step": 2619, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:35.213335", "step": 2619, "epoch": 2 }, { "type": "loss", "content": 0.007945427671074867, "timestamp": "2025-09-30 22:16:35.248076", "step": 2620, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:35.284637", "step": 2620, "epoch": 2 }, { "type": "loss", "content": 0.010467395186424255, "timestamp": "2025-09-30 22:16:35.297938", "step": 2621, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:35.336130", "step": 2621, "epoch": 2 }, { "type": "loss", "content": 0.013638225384056568, "timestamp": "2025-09-30 22:16:35.344127", "step": 2622, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:35.382241", "step": 2622, "epoch": 2 }, { "type": "loss", "content": 0.011790476739406586, "timestamp": "2025-09-30 22:16:35.389735", "step": 2623, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:35.429339", "step": 2623, "epoch": 2 }, { "type": "loss", "content": 0.010595177300274372, "timestamp": "2025-09-30 22:16:35.462690", "step": 2624, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:35.504914", "step": 2624, "epoch": 2 }, { "type": "loss", "content": 0.004159559495747089, "timestamp": "2025-09-30 22:16:35.520372", "step": 2625, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:35.564123", "step": 2625, "epoch": 2 }, { "type": "loss", "content": 0.0123995216563344, "timestamp": "2025-09-30 22:16:35.572155", "step": 2626, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:35.611513", "step": 2626, "epoch": 2 }, { "type": "loss", "content": 0.007546386681497097, "timestamp": "2025-09-30 22:16:35.624905", "step": 2627, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:35.664135", "step": 2627, "epoch": 2 }, { "type": "loss", "content": 0.02181871049106121, "timestamp": "2025-09-30 22:16:35.697610", "step": 2628, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:35.738846", "step": 2628, "epoch": 2 }, { "type": "loss", "content": 0.008894718252122402, "timestamp": "2025-09-30 22:16:35.746893", "step": 2629, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:35.789294", "step": 2629, "epoch": 2 }, { "type": "loss", "content": 0.011142316274344921, "timestamp": "2025-09-30 22:16:35.796547", "step": 2630, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:16:35.847596", "step": 2630, "epoch": 2 }, { "type": "loss", "content": 0.005113878753036261, "timestamp": "2025-09-30 22:16:35.866635", "step": 2631, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:35.906628", "step": 2631, "epoch": 2 }, { "type": "loss", "content": 0.011992032639682293, "timestamp": "2025-09-30 22:16:35.934679", "step": 2632, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:35.970105", "step": 2632, "epoch": 2 }, { "type": "loss", "content": 0.006162740755826235, "timestamp": "2025-09-30 22:16:35.975656", "step": 2633, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:36.013098", "step": 2633, "epoch": 2 }, { "type": "loss", "content": 0.008410664275288582, "timestamp": "2025-09-30 22:16:36.026853", "step": 2634, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:36.065476", "step": 2634, "epoch": 2 }, { "type": "loss", "content": 0.01313704438507557, "timestamp": "2025-09-30 22:16:36.073020", "step": 2635, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:36.106648", "step": 2635, "epoch": 2 }, { "type": "loss", "content": 0.008390288800001144, "timestamp": "2025-09-30 22:16:36.135544", "step": 2636, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:36.169330", "step": 2636, "epoch": 2 }, { "type": "loss", "content": 0.007198525592684746, "timestamp": "2025-09-30 22:16:36.179917", "step": 2637, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:36.217112", "step": 2637, "epoch": 2 }, { "type": "loss", "content": 0.008321291767060757, "timestamp": "2025-09-30 22:16:36.227512", "step": 2638, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:36.266192", "step": 2638, "epoch": 2 }, { "type": "loss", "content": 0.012173098511993885, "timestamp": "2025-09-30 22:16:36.274101", "step": 2639, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:36.309567", "step": 2639, "epoch": 2 }, { "type": "loss", "content": 0.00847768783569336, "timestamp": "2025-09-30 22:16:36.340857", "step": 2640, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:36.375885", "step": 2640, "epoch": 2 }, { "type": "loss", "content": 0.012957733124494553, "timestamp": "2025-09-30 22:16:36.386338", "step": 2641, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:36.426287", "step": 2641, "epoch": 2 }, { "type": "loss", "content": 0.010323571972548962, "timestamp": "2025-09-30 22:16:36.433281", "step": 2642, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:36.467722", "step": 2642, "epoch": 2 }, { "type": "loss", "content": 0.0029670840594917536, "timestamp": "2025-09-30 22:16:36.475774", "step": 2643, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:36.512189", "step": 2643, "epoch": 2 }, { "type": "loss", "content": 0.004911190830171108, "timestamp": "2025-09-30 22:16:36.540758", "step": 2644, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:36.583105", "step": 2644, "epoch": 2 }, { "type": "loss", "content": 0.008394652977585793, "timestamp": "2025-09-30 22:16:36.591675", "step": 2645, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:16:39.149327", "step": 2645, "epoch": 2 }, { "type": "pplx", "content": 5.612307348274852, "timestamp": "2025-09-30 22:16:39.152502", "step": 2645, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:39.188349", "step": 2645, "epoch": 2 }, { "type": "loss", "content": 0.005680200643837452, "timestamp": "2025-09-30 22:16:39.195372", "step": 2646, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:39.230170", "step": 2646, "epoch": 2 }, { "type": "loss", "content": 0.010132947005331516, "timestamp": "2025-09-30 22:16:39.242355", "step": 2647, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:39.283548", "step": 2647, "epoch": 2 }, { "type": "loss", "content": 0.0077790007926523685, "timestamp": "2025-09-30 22:16:39.314936", "step": 2648, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:39.348484", "step": 2648, "epoch": 2 }, { "type": "loss", "content": 0.012910939753055573, "timestamp": "2025-09-30 22:16:39.359645", "step": 2649, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:39.393021", "step": 2649, "epoch": 2 }, { "type": "loss", "content": 0.00460228743031621, "timestamp": "2025-09-30 22:16:39.400694", "step": 2650, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:39.452819", "step": 2650, "epoch": 2 }, { "type": "loss", "content": 0.008654727600514889, "timestamp": "2025-09-30 22:16:39.468445", "step": 2651, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:39.512822", "step": 2651, "epoch": 2 }, { "type": "loss", "content": 0.010901113040745258, "timestamp": "2025-09-30 22:16:39.540963", "step": 2652, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:39.583514", "step": 2652, "epoch": 2 }, { "type": "loss", "content": 0.006359513849020004, "timestamp": "2025-09-30 22:16:39.596781", "step": 2653, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:16:39.646496", "step": 2653, "epoch": 2 }, { "type": "loss", "content": 0.004073025193065405, "timestamp": "2025-09-30 22:16:39.663926", "step": 2654, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:39.709793", "step": 2654, "epoch": 2 }, { "type": "loss", "content": 0.0075466991402208805, "timestamp": "2025-09-30 22:16:39.717915", "step": 2655, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:39.765792", "step": 2655, "epoch": 2 }, { "type": "loss", "content": 0.005887266248464584, "timestamp": "2025-09-30 22:16:39.796961", "step": 2656, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:39.847171", "step": 2656, "epoch": 2 }, { "type": "loss", "content": 0.005093955434858799, "timestamp": "2025-09-30 22:16:39.860311", "step": 2657, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:39.901962", "step": 2657, "epoch": 2 }, { "type": "loss", "content": 0.008431058377027512, "timestamp": "2025-09-30 22:16:39.913189", "step": 2658, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:39.950678", "step": 2658, "epoch": 2 }, { "type": "loss", "content": 0.015701932832598686, "timestamp": "2025-09-30 22:16:39.958670", "step": 2659, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:39.999911", "step": 2659, "epoch": 2 }, { "type": "loss", "content": 0.008542469702661037, "timestamp": "2025-09-30 22:16:40.034803", "step": 2660, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:40.069636", "step": 2660, "epoch": 2 }, { "type": "loss", "content": 0.00910536665469408, "timestamp": "2025-09-30 22:16:40.080488", "step": 2661, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:40.127745", "step": 2661, "epoch": 2 }, { "type": "loss", "content": 0.008821639232337475, "timestamp": "2025-09-30 22:16:40.138748", "step": 2662, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:40.186129", "step": 2662, "epoch": 2 }, { "type": "loss", "content": 0.010496851988136768, "timestamp": "2025-09-30 22:16:40.200138", "step": 2663, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:40.243407", "step": 2663, "epoch": 2 }, { "type": "loss", "content": 0.012230618856847286, "timestamp": "2025-09-30 22:16:40.274225", "step": 2664, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:40.325452", "step": 2664, "epoch": 2 }, { "type": "loss", "content": 0.010552698746323586, "timestamp": "2025-09-30 22:16:40.336039", "step": 2665, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:40.392450", "step": 2665, "epoch": 2 }, { "type": "loss", "content": 0.008086116053164005, "timestamp": "2025-09-30 22:16:40.400064", "step": 2666, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:40.437674", "step": 2666, "epoch": 2 }, { "type": "loss", "content": 0.011528640985488892, "timestamp": "2025-09-30 22:16:40.445368", "step": 2667, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:40.484549", "step": 2667, "epoch": 2 }, { "type": "loss", "content": 0.005334731191396713, "timestamp": "2025-09-30 22:16:40.515958", "step": 2668, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:40.555628", "step": 2668, "epoch": 2 }, { "type": "loss", "content": 0.015003271400928497, "timestamp": "2025-09-30 22:16:40.561228", "step": 2669, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:40.602401", "step": 2669, "epoch": 2 }, { "type": "loss", "content": 0.004248283337801695, "timestamp": "2025-09-30 22:16:40.614950", "step": 2670, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:40.658511", "step": 2670, "epoch": 2 }, { "type": "loss", "content": 0.006140046752989292, "timestamp": "2025-09-30 22:16:40.671079", "step": 2671, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:40.707833", "step": 2671, "epoch": 2 }, { "type": "loss", "content": 0.006364536006003618, "timestamp": "2025-09-30 22:16:40.739813", "step": 2672, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:40.776091", "step": 2672, "epoch": 2 }, { "type": "loss", "content": 0.008775352500379086, "timestamp": "2025-09-30 22:16:40.781275", "step": 2673, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:40.827701", "step": 2673, "epoch": 2 }, { "type": "loss", "content": 0.006313290912657976, "timestamp": "2025-09-30 22:16:40.838836", "step": 2674, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:40.882400", "step": 2674, "epoch": 2 }, { "type": "loss", "content": 0.008130986243486404, "timestamp": "2025-09-30 22:16:40.896139", "step": 2675, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:40.933361", "step": 2675, "epoch": 2 }, { "type": "loss", "content": 0.009295792318880558, "timestamp": "2025-09-30 22:16:40.964773", "step": 2676, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:41.001906", "step": 2676, "epoch": 2 }, { "type": "loss", "content": 0.015373509377241135, "timestamp": "2025-09-30 22:16:41.007655", "step": 2677, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:41.046913", "step": 2677, "epoch": 2 }, { "type": "loss", "content": 0.005237262696027756, "timestamp": "2025-09-30 22:16:41.058163", "step": 2678, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:41.098783", "step": 2678, "epoch": 2 }, { "type": "loss", "content": 0.02086806111037731, "timestamp": "2025-09-30 22:16:41.109197", "step": 2679, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:41.146735", "step": 2679, "epoch": 2 }, { "type": "loss", "content": 0.006491804029792547, "timestamp": "2025-09-30 22:16:41.179913", "step": 2680, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:41.224429", "step": 2680, "epoch": 2 }, { "type": "loss", "content": 0.008275783620774746, "timestamp": "2025-09-30 22:16:41.233398", "step": 2681, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:41.272106", "step": 2681, "epoch": 2 }, { "type": "loss", "content": 0.006674888078123331, "timestamp": "2025-09-30 22:16:41.280478", "step": 2682, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:41.319284", "step": 2682, "epoch": 2 }, { "type": "loss", "content": 0.00760753033682704, "timestamp": "2025-09-30 22:16:41.326584", "step": 2683, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:41.362576", "step": 2683, "epoch": 2 }, { "type": "loss", "content": 0.013524006120860577, "timestamp": "2025-09-30 22:16:41.392952", "step": 2684, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:41.429735", "step": 2684, "epoch": 2 }, { "type": "loss", "content": 0.011065523140132427, "timestamp": "2025-09-30 22:16:41.446644", "step": 2685, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:41.483438", "step": 2685, "epoch": 2 }, { "type": "loss", "content": 0.009008095599710941, "timestamp": "2025-09-30 22:16:41.496004", "step": 2686, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:41.538101", "step": 2686, "epoch": 2 }, { "type": "loss", "content": 0.00455522770062089, "timestamp": "2025-09-30 22:16:41.551830", "step": 2687, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:41.586169", "step": 2687, "epoch": 2 }, { "type": "loss", "content": 0.008027766831219196, "timestamp": "2025-09-30 22:16:41.614628", "step": 2688, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:41.657018", "step": 2688, "epoch": 2 }, { "type": "loss", "content": 0.008173450827598572, "timestamp": "2025-09-30 22:16:41.664118", "step": 2689, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:41.701175", "step": 2689, "epoch": 2 }, { "type": "loss", "content": 0.008348836563527584, "timestamp": "2025-09-30 22:16:41.711587", "step": 2690, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:41.746174", "step": 2690, "epoch": 2 }, { "type": "loss", "content": 0.0071425591595470905, "timestamp": "2025-09-30 22:16:41.757208", "step": 2691, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:41.788668", "step": 2691, "epoch": 2 }, { "type": "loss", "content": 0.009231861680746078, "timestamp": "2025-09-30 22:16:41.817686", "step": 2692, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-30 22:16:41.864045", "step": 2692, "epoch": 2 }, { "type": "loss", "content": 0.006422586273401976, "timestamp": "2025-09-30 22:16:41.883276", "step": 2693, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:41.918044", "step": 2693, "epoch": 2 }, { "type": "loss", "content": 0.006415804382413626, "timestamp": "2025-09-30 22:16:41.930613", "step": 2694, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:41.975285", "step": 2694, "epoch": 2 }, { "type": "loss", "content": 0.011909106746315956, "timestamp": "2025-09-30 22:16:41.989060", "step": 2695, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:42.026533", "step": 2695, "epoch": 2 }, { "type": "loss", "content": 0.004883012734353542, "timestamp": "2025-09-30 22:16:42.060805", "step": 2696, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:42.096110", "step": 2696, "epoch": 2 }, { "type": "loss", "content": 0.00944979302585125, "timestamp": "2025-09-30 22:16:42.105923", "step": 2697, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:42.147367", "step": 2697, "epoch": 2 }, { "type": "loss", "content": 0.008571948856115341, "timestamp": "2025-09-30 22:16:42.161074", "step": 2698, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:42.196069", "step": 2698, "epoch": 2 }, { "type": "loss", "content": 0.009616751223802567, "timestamp": "2025-09-30 22:16:42.208376", "step": 2699, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:42.246236", "step": 2699, "epoch": 2 }, { "type": "loss", "content": 0.0077133565209805965, "timestamp": "2025-09-30 22:16:42.279415", "step": 2700, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:42.317520", "step": 2700, "epoch": 2 }, { "type": "loss", "content": 0.018032224848866463, "timestamp": "2025-09-30 22:16:42.327538", "step": 2701, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:42.369361", "step": 2701, "epoch": 2 }, { "type": "loss", "content": 0.0063260579481720924, "timestamp": "2025-09-30 22:16:42.381698", "step": 2702, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:42.422675", "step": 2702, "epoch": 2 }, { "type": "loss", "content": 0.013565266504883766, "timestamp": "2025-09-30 22:16:42.436662", "step": 2703, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:42.473166", "step": 2703, "epoch": 2 }, { "type": "loss", "content": 0.004449347034096718, "timestamp": "2025-09-30 22:16:42.502072", "step": 2704, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:42.548352", "step": 2704, "epoch": 2 }, { "type": "loss", "content": 0.005007654894143343, "timestamp": "2025-09-30 22:16:42.561721", "step": 2705, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:42.602682", "step": 2705, "epoch": 2 }, { "type": "loss", "content": 0.007996181957423687, "timestamp": "2025-09-30 22:16:42.615244", "step": 2706, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:42.653937", "step": 2706, "epoch": 2 }, { "type": "loss", "content": 0.008707849308848381, "timestamp": "2025-09-30 22:16:42.661435", "step": 2707, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:42.700833", "step": 2707, "epoch": 2 }, { "type": "loss", "content": 0.010890424251556396, "timestamp": "2025-09-30 22:16:42.736095", "step": 2708, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:42.786650", "step": 2708, "epoch": 2 }, { "type": "loss", "content": 0.006541731301695108, "timestamp": "2025-09-30 22:16:42.797260", "step": 2709, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:42.834658", "step": 2709, "epoch": 2 }, { "type": "loss", "content": 0.006175168789923191, "timestamp": "2025-09-30 22:16:42.848477", "step": 2710, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:42.884532", "step": 2710, "epoch": 2 }, { "type": "loss", "content": 0.008247343823313713, "timestamp": "2025-09-30 22:16:42.896901", "step": 2711, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:42.930147", "step": 2711, "epoch": 2 }, { "type": "loss", "content": 0.0034400750882923603, "timestamp": "2025-09-30 22:16:42.963365", "step": 2712, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:42.999823", "step": 2712, "epoch": 2 }, { "type": "loss", "content": 0.009791983291506767, "timestamp": "2025-09-30 22:16:43.009942", "step": 2713, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:43.044919", "step": 2713, "epoch": 2 }, { "type": "loss", "content": 0.009476681239902973, "timestamp": "2025-09-30 22:16:43.057276", "step": 2714, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:43.100374", "step": 2714, "epoch": 2 }, { "type": "loss", "content": 0.010845120064914227, "timestamp": "2025-09-30 22:16:43.111488", "step": 2715, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:43.146675", "step": 2715, "epoch": 2 }, { "type": "loss", "content": 0.010124566033482552, "timestamp": "2025-09-30 22:16:43.175100", "step": 2716, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:43.211733", "step": 2716, "epoch": 2 }, { "type": "loss", "content": 0.0077261775732040405, "timestamp": "2025-09-30 22:16:43.225104", "step": 2717, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:43.261376", "step": 2717, "epoch": 2 }, { "type": "loss", "content": 0.0072675361298024654, "timestamp": "2025-09-30 22:16:43.275145", "step": 2718, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:43.307553", "step": 2718, "epoch": 2 }, { "type": "loss", "content": 0.006441792938858271, "timestamp": "2025-09-30 22:16:43.320131", "step": 2719, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:43.367749", "step": 2719, "epoch": 2 }, { "type": "loss", "content": 0.010015995241701603, "timestamp": "2025-09-30 22:16:43.401932", "step": 2720, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:43.437689", "step": 2720, "epoch": 2 }, { "type": "loss", "content": 0.005246052052825689, "timestamp": "2025-09-30 22:16:43.447698", "step": 2721, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:43.488488", "step": 2721, "epoch": 2 }, { "type": "loss", "content": 0.007146949879825115, "timestamp": "2025-09-30 22:16:43.498983", "step": 2722, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:43.530778", "step": 2722, "epoch": 2 }, { "type": "loss", "content": 0.02003488689661026, "timestamp": "2025-09-30 22:16:43.537963", "step": 2723, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:43.578736", "step": 2723, "epoch": 2 }, { "type": "loss", "content": 0.0034312934149056673, "timestamp": "2025-09-30 22:16:43.613536", "step": 2724, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:43.660497", "step": 2724, "epoch": 2 }, { "type": "loss", "content": 0.02004372887313366, "timestamp": "2025-09-30 22:16:43.670668", "step": 2725, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:43.708211", "step": 2725, "epoch": 2 }, { "type": "loss", "content": 0.010580234229564667, "timestamp": "2025-09-30 22:16:43.721930", "step": 2726, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:43.761115", "step": 2726, "epoch": 2 }, { "type": "loss", "content": 0.008645176887512207, "timestamp": "2025-09-30 22:16:43.773466", "step": 2727, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:43.809014", "step": 2727, "epoch": 2 }, { "type": "loss", "content": 0.008177714422345161, "timestamp": "2025-09-30 22:16:43.841055", "step": 2728, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:43.884528", "step": 2728, "epoch": 2 }, { "type": "loss", "content": 0.011197710409760475, "timestamp": "2025-09-30 22:16:43.890073", "step": 2729, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:43.935397", "step": 2729, "epoch": 2 }, { "type": "loss", "content": 0.009342173114418983, "timestamp": "2025-09-30 22:16:43.949069", "step": 2730, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:43.988455", "step": 2730, "epoch": 2 }, { "type": "loss", "content": 0.006484350189566612, "timestamp": "2025-09-30 22:16:44.000833", "step": 2731, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:44.046982", "step": 2731, "epoch": 2 }, { "type": "loss", "content": 0.00473699951544404, "timestamp": "2025-09-30 22:16:44.083426", "step": 2732, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:44.135962", "step": 2732, "epoch": 2 }, { "type": "loss", "content": 0.00753026595339179, "timestamp": "2025-09-30 22:16:44.146676", "step": 2733, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:44.181040", "step": 2733, "epoch": 2 }, { "type": "loss", "content": 0.009313041344285011, "timestamp": "2025-09-30 22:16:44.193360", "step": 2734, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:44.227806", "step": 2734, "epoch": 2 }, { "type": "loss", "content": 0.007867696695029736, "timestamp": "2025-09-30 22:16:44.234981", "step": 2735, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:44.277342", "step": 2735, "epoch": 2 }, { "type": "loss", "content": 0.007439819164574146, "timestamp": "2025-09-30 22:16:44.305482", "step": 2736, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:44.339209", "step": 2736, "epoch": 2 }, { "type": "loss", "content": 0.0045057861134409904, "timestamp": "2025-09-30 22:16:44.343976", "step": 2737, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:44.378691", "step": 2737, "epoch": 2 }, { "type": "loss", "content": 0.002972115995362401, "timestamp": "2025-09-30 22:16:44.386613", "step": 2738, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:44.419412", "step": 2738, "epoch": 2 }, { "type": "loss", "content": 0.004463300108909607, "timestamp": "2025-09-30 22:16:44.429801", "step": 2739, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:44.468385", "step": 2739, "epoch": 2 }, { "type": "loss", "content": 0.006214289925992489, "timestamp": "2025-09-30 22:16:44.500414", "step": 2740, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:44.542639", "step": 2740, "epoch": 2 }, { "type": "loss", "content": 0.004895984660834074, "timestamp": "2025-09-30 22:16:44.551585", "step": 2741, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:44.600086", "step": 2741, "epoch": 2 }, { "type": "loss", "content": 0.006714960560202599, "timestamp": "2025-09-30 22:16:44.613504", "step": 2742, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:44.653997", "step": 2742, "epoch": 2 }, { "type": "loss", "content": 0.00872341264039278, "timestamp": "2025-09-30 22:16:44.665210", "step": 2743, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:44.699864", "step": 2743, "epoch": 2 }, { "type": "loss", "content": 0.002013713587075472, "timestamp": "2025-09-30 22:16:44.727754", "step": 2744, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:44.765587", "step": 2744, "epoch": 2 }, { "type": "loss", "content": 0.014771471731364727, "timestamp": "2025-09-30 22:16:44.774245", "step": 2745, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:44.817125", "step": 2745, "epoch": 2 }, { "type": "loss", "content": 0.011147224344313145, "timestamp": "2025-09-30 22:16:44.828414", "step": 2746, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:44.863442", "step": 2746, "epoch": 2 }, { "type": "loss", "content": 0.006471259519457817, "timestamp": "2025-09-30 22:16:44.873917", "step": 2747, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:44.908340", "step": 2747, "epoch": 2 }, { "type": "loss", "content": 0.004266827832907438, "timestamp": "2025-09-30 22:16:44.936098", "step": 2748, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:44.981796", "step": 2748, "epoch": 2 }, { "type": "loss", "content": 0.004098384641110897, "timestamp": "2025-09-30 22:16:44.991855", "step": 2749, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:45.024391", "step": 2749, "epoch": 2 }, { "type": "loss", "content": 0.0010082995286211371, "timestamp": "2025-09-30 22:16:45.028924", "step": 2750, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:45.066802", "step": 2750, "epoch": 2 }, { "type": "loss", "content": 0.006948740687221289, "timestamp": "2025-09-30 22:16:45.074810", "step": 2751, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:45.111703", "step": 2751, "epoch": 2 }, { "type": "loss", "content": 0.011320582590997219, "timestamp": "2025-09-30 22:16:45.137127", "step": 2752, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:45.171102", "step": 2752, "epoch": 2 }, { "type": "loss", "content": 0.014880691654980183, "timestamp": "2025-09-30 22:16:45.183070", "step": 2753, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:45.224525", "step": 2753, "epoch": 2 }, { "type": "loss", "content": 0.0175726730376482, "timestamp": "2025-09-30 22:16:45.232156", "step": 2754, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:45.265789", "step": 2754, "epoch": 2 }, { "type": "loss", "content": 0.010096393525600433, "timestamp": "2025-09-30 22:16:45.273879", "step": 2755, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:45.306603", "step": 2755, "epoch": 2 }, { "type": "loss", "content": 0.009255954064428806, "timestamp": "2025-09-30 22:16:45.337975", "step": 2756, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:45.377046", "step": 2756, "epoch": 2 }, { "type": "loss", "content": 0.010803707875311375, "timestamp": "2025-09-30 22:16:45.385053", "step": 2757, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:45.428407", "step": 2757, "epoch": 2 }, { "type": "loss", "content": 0.00232686847448349, "timestamp": "2025-09-30 22:16:45.436196", "step": 2758, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:16:45.483115", "step": 2758, "epoch": 2 }, { "type": "loss", "content": 0.007045588456094265, "timestamp": "2025-09-30 22:16:45.500864", "step": 2759, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:45.535977", "step": 2759, "epoch": 2 }, { "type": "loss", "content": 0.014438306912779808, "timestamp": "2025-09-30 22:16:45.569350", "step": 2760, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:16:48.138642", "step": 2760, "epoch": 2 }, { "type": "pplx", "content": 5.673683070746086, "timestamp": "2025-09-30 22:16:48.141952", "step": 2760, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:48.181029", "step": 2760, "epoch": 2 }, { "type": "loss", "content": 0.010565848089754581, "timestamp": "2025-09-30 22:16:48.193572", "step": 2761, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:48.228837", "step": 2761, "epoch": 2 }, { "type": "loss", "content": 0.004915672354400158, "timestamp": "2025-09-30 22:16:48.238782", "step": 2762, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:48.280729", "step": 2762, "epoch": 2 }, { "type": "loss", "content": 0.006179687101393938, "timestamp": "2025-09-30 22:16:48.290965", "step": 2763, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:48.328515", "step": 2763, "epoch": 2 }, { "type": "loss", "content": 0.005078598856925964, "timestamp": "2025-09-30 22:16:48.357280", "step": 2764, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:48.395005", "step": 2764, "epoch": 2 }, { "type": "loss", "content": 0.009708116762340069, "timestamp": "2025-09-30 22:16:48.402867", "step": 2765, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:48.438841", "step": 2765, "epoch": 2 }, { "type": "loss", "content": 0.0027474777307361364, "timestamp": "2025-09-30 22:16:48.451094", "step": 2766, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:16:48.500859", "step": 2766, "epoch": 2 }, { "type": "loss", "content": 0.003014597110450268, "timestamp": "2025-09-30 22:16:48.517972", "step": 2767, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:48.559087", "step": 2767, "epoch": 2 }, { "type": "loss", "content": 0.003560777520760894, "timestamp": "2025-09-30 22:16:48.590302", "step": 2768, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:48.629071", "step": 2768, "epoch": 2 }, { "type": "loss", "content": 0.0011912431800737977, "timestamp": "2025-09-30 22:16:48.634146", "step": 2769, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:48.678507", "step": 2769, "epoch": 2 }, { "type": "loss", "content": 0.0011664634803310037, "timestamp": "2025-09-30 22:16:48.685533", "step": 2770, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:48.719682", "step": 2770, "epoch": 2 }, { "type": "loss", "content": 0.004645258653908968, "timestamp": "2025-09-30 22:16:48.729838", "step": 2771, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:48.762868", "step": 2771, "epoch": 2 }, { "type": "loss", "content": 0.001425820984877646, "timestamp": "2025-09-30 22:16:48.789089", "step": 2772, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:48.833344", "step": 2772, "epoch": 2 }, { "type": "loss", "content": 0.0033654433209449053, "timestamp": "2025-09-30 22:16:48.843132", "step": 2773, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:48.880928", "step": 2773, "epoch": 2 }, { "type": "loss", "content": 0.010003729723393917, "timestamp": "2025-09-30 22:16:48.894800", "step": 2774, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:48.937793", "step": 2774, "epoch": 2 }, { "type": "loss", "content": 0.0008842459646984935, "timestamp": "2025-09-30 22:16:48.949233", "step": 2775, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:48.993302", "step": 2775, "epoch": 2 }, { "type": "loss", "content": 0.0053793699480593204, "timestamp": "2025-09-30 22:16:49.029923", "step": 2776, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:49.072616", "step": 2776, "epoch": 2 }, { "type": "loss", "content": 0.006544886156916618, "timestamp": "2025-09-30 22:16:49.083218", "step": 2777, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:49.124103", "step": 2777, "epoch": 2 }, { "type": "loss", "content": 0.01111549325287342, "timestamp": "2025-09-30 22:16:49.128581", "step": 2778, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:49.161644", "step": 2778, "epoch": 2 }, { "type": "loss", "content": 0.004963804967701435, "timestamp": "2025-09-30 22:16:49.168635", "step": 2779, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:49.215963", "step": 2779, "epoch": 2 }, { "type": "loss", "content": 0.005580283235758543, "timestamp": "2025-09-30 22:16:49.250160", "step": 2780, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:16:49.292602", "step": 2780, "epoch": 2 }, { "type": "loss", "content": 0.006849126424640417, "timestamp": "2025-09-30 22:16:49.307978", "step": 2781, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:49.344740", "step": 2781, "epoch": 2 }, { "type": "loss", "content": 0.005620031151920557, "timestamp": "2025-09-30 22:16:49.355233", "step": 2782, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:49.398813", "step": 2782, "epoch": 2 }, { "type": "loss", "content": 0.015408042818307877, "timestamp": "2025-09-30 22:16:49.412561", "step": 2783, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:16:49.467169", "step": 2783, "epoch": 2 }, { "type": "loss", "content": 0.006662317551672459, "timestamp": "2025-09-30 22:16:49.505160", "step": 2784, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:49.552554", "step": 2784, "epoch": 2 }, { "type": "loss", "content": 0.003204222535714507, "timestamp": "2025-09-30 22:16:49.562519", "step": 2785, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:49.597206", "step": 2785, "epoch": 2 }, { "type": "loss", "content": 0.007308874279260635, "timestamp": "2025-09-30 22:16:49.607631", "step": 2786, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:49.647068", "step": 2786, "epoch": 2 }, { "type": "loss", "content": 0.007479496765881777, "timestamp": "2025-09-30 22:16:49.660798", "step": 2787, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:49.702343", "step": 2787, "epoch": 2 }, { "type": "loss", "content": 0.008679402060806751, "timestamp": "2025-09-30 22:16:49.733617", "step": 2788, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:49.774691", "step": 2788, "epoch": 2 }, { "type": "loss", "content": 0.007373732049018145, "timestamp": "2025-09-30 22:16:49.782813", "step": 2789, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:49.830177", "step": 2789, "epoch": 2 }, { "type": "loss", "content": 0.006688355002552271, "timestamp": "2025-09-30 22:16:49.845837", "step": 2790, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:49.879203", "step": 2790, "epoch": 2 }, { "type": "loss", "content": 0.006614117417484522, "timestamp": "2025-09-30 22:16:49.891803", "step": 2791, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:49.928934", "step": 2791, "epoch": 2 }, { "type": "loss", "content": 0.009344483725726604, "timestamp": "2025-09-30 22:16:49.963487", "step": 2792, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:49.996062", "step": 2792, "epoch": 2 }, { "type": "loss", "content": 0.00403931550681591, "timestamp": "2025-09-30 22:16:50.005918", "step": 2793, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:50.042762", "step": 2793, "epoch": 2 }, { "type": "loss", "content": 0.005787648260593414, "timestamp": "2025-09-30 22:16:50.050357", "step": 2794, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:50.100953", "step": 2794, "epoch": 2 }, { "type": "loss", "content": 0.006187926512211561, "timestamp": "2025-09-30 22:16:50.108506", "step": 2795, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:50.147094", "step": 2795, "epoch": 2 }, { "type": "loss", "content": 0.011713352054357529, "timestamp": "2025-09-30 22:16:50.174792", "step": 2796, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:50.207787", "step": 2796, "epoch": 2 }, { "type": "loss", "content": 0.007518185302615166, "timestamp": "2025-09-30 22:16:50.213552", "step": 2797, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:50.252277", "step": 2797, "epoch": 2 }, { "type": "loss", "content": 0.008915386162698269, "timestamp": "2025-09-30 22:16:50.263468", "step": 2798, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:50.304363", "step": 2798, "epoch": 2 }, { "type": "loss", "content": 0.008104098960757256, "timestamp": "2025-09-30 22:16:50.312359", "step": 2799, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:50.345595", "step": 2799, "epoch": 2 }, { "type": "loss", "content": 0.006924333982169628, "timestamp": "2025-09-30 22:16:50.377643", "step": 2800, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:50.410768", "step": 2800, "epoch": 2 }, { "type": "loss", "content": 0.003365806769579649, "timestamp": "2025-09-30 22:16:50.421134", "step": 2801, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:50.454439", "step": 2801, "epoch": 2 }, { "type": "loss", "content": 0.0023598482366651297, "timestamp": "2025-09-30 22:16:50.465344", "step": 2802, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:50.508284", "step": 2802, "epoch": 2 }, { "type": "loss", "content": 0.013317740522325039, "timestamp": "2025-09-30 22:16:50.520802", "step": 2803, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:50.562655", "step": 2803, "epoch": 2 }, { "type": "loss", "content": 0.008947531692683697, "timestamp": "2025-09-30 22:16:50.591680", "step": 2804, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:50.630296", "step": 2804, "epoch": 2 }, { "type": "loss", "content": 0.01369408518075943, "timestamp": "2025-09-30 22:16:50.638901", "step": 2805, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:50.673020", "step": 2805, "epoch": 2 }, { "type": "loss", "content": 0.004378673620522022, "timestamp": "2025-09-30 22:16:50.680265", "step": 2806, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:50.713023", "step": 2806, "epoch": 2 }, { "type": "loss", "content": 0.007849207147955894, "timestamp": "2025-09-30 22:16:50.723981", "step": 2807, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:50.765841", "step": 2807, "epoch": 2 }, { "type": "loss", "content": 0.0012026155600324273, "timestamp": "2025-09-30 22:16:50.793758", "step": 2808, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:50.854320", "step": 2808, "epoch": 2 }, { "type": "loss", "content": 0.006514945533126593, "timestamp": "2025-09-30 22:16:50.867724", "step": 2809, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:50.906035", "step": 2809, "epoch": 2 }, { "type": "loss", "content": 0.01418620627373457, "timestamp": "2025-09-30 22:16:50.914031", "step": 2810, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:50.960182", "step": 2810, "epoch": 2 }, { "type": "loss", "content": 0.006249403115361929, "timestamp": "2025-09-30 22:16:50.966965", "step": 2811, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:51.006078", "step": 2811, "epoch": 2 }, { "type": "loss", "content": 0.004195347428321838, "timestamp": "2025-09-30 22:16:51.040709", "step": 2812, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:51.076931", "step": 2812, "epoch": 2 }, { "type": "loss", "content": 0.005581292789429426, "timestamp": "2025-09-30 22:16:51.089483", "step": 2813, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:51.123525", "step": 2813, "epoch": 2 }, { "type": "loss", "content": 0.009607982821762562, "timestamp": "2025-09-30 22:16:51.135795", "step": 2814, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:51.173791", "step": 2814, "epoch": 2 }, { "type": "loss", "content": 0.009186118841171265, "timestamp": "2025-09-30 22:16:51.186316", "step": 2815, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:51.239936", "step": 2815, "epoch": 2 }, { "type": "loss", "content": 0.012692922726273537, "timestamp": "2025-09-30 22:16:51.274571", "step": 2816, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-30 22:16:51.328584", "step": 2816, "epoch": 2 }, { "type": "loss", "content": 0.00466757919639349, "timestamp": "2025-09-30 22:16:51.347843", "step": 2817, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:51.409137", "step": 2817, "epoch": 2 }, { "type": "loss", "content": 0.01156692486256361, "timestamp": "2025-09-30 22:16:51.424766", "step": 2818, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:51.480145", "step": 2818, "epoch": 2 }, { "type": "loss", "content": 0.013968187384307384, "timestamp": "2025-09-30 22:16:51.491311", "step": 2819, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:51.532773", "step": 2819, "epoch": 2 }, { "type": "loss", "content": 0.009603479877114296, "timestamp": "2025-09-30 22:16:51.567375", "step": 2820, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:51.603373", "step": 2820, "epoch": 2 }, { "type": "loss", "content": 0.007417512126266956, "timestamp": "2025-09-30 22:16:51.609078", "step": 2821, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:51.644646", "step": 2821, "epoch": 2 }, { "type": "loss", "content": 0.014802886173129082, "timestamp": "2025-09-30 22:16:51.657202", "step": 2822, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:51.696743", "step": 2822, "epoch": 2 }, { "type": "loss", "content": 0.0025944069493561983, "timestamp": "2025-09-30 22:16:51.712350", "step": 2823, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:51.750045", "step": 2823, "epoch": 2 }, { "type": "loss", "content": 0.00613754615187645, "timestamp": "2025-09-30 22:16:51.784776", "step": 2824, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:51.830355", "step": 2824, "epoch": 2 }, { "type": "loss", "content": 0.007381127215921879, "timestamp": "2025-09-30 22:16:51.838998", "step": 2825, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:51.884570", "step": 2825, "epoch": 2 }, { "type": "loss", "content": 0.0035370741970837116, "timestamp": "2025-09-30 22:16:51.897885", "step": 2826, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:51.934683", "step": 2826, "epoch": 2 }, { "type": "loss", "content": 0.002247849479317665, "timestamp": "2025-09-30 22:16:51.941913", "step": 2827, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:51.977097", "step": 2827, "epoch": 2 }, { "type": "loss", "content": 0.002631678944453597, "timestamp": "2025-09-30 22:16:52.008354", "step": 2828, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:52.045211", "step": 2828, "epoch": 2 }, { "type": "loss", "content": 0.005599735304713249, "timestamp": "2025-09-30 22:16:52.055560", "step": 2829, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:52.089528", "step": 2829, "epoch": 2 }, { "type": "loss", "content": 0.0031780179124325514, "timestamp": "2025-09-30 22:16:52.096676", "step": 2830, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:52.134997", "step": 2830, "epoch": 2 }, { "type": "loss", "content": 0.008361829444766045, "timestamp": "2025-09-30 22:16:52.145976", "step": 2831, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:16:52.203847", "step": 2831, "epoch": 2 }, { "type": "loss", "content": 0.00743240537121892, "timestamp": "2025-09-30 22:16:52.243651", "step": 2832, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:52.278318", "step": 2832, "epoch": 2 }, { "type": "loss", "content": 0.007705148309469223, "timestamp": "2025-09-30 22:16:52.287065", "step": 2833, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:52.325827", "step": 2833, "epoch": 2 }, { "type": "loss", "content": 0.00037418119609355927, "timestamp": "2025-09-30 22:16:52.332946", "step": 2834, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:16:52.380514", "step": 2834, "epoch": 2 }, { "type": "loss", "content": 0.004442401695996523, "timestamp": "2025-09-30 22:16:52.396807", "step": 2835, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:52.431209", "step": 2835, "epoch": 2 }, { "type": "loss", "content": 0.007556099444627762, "timestamp": "2025-09-30 22:16:52.460105", "step": 2836, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:52.495274", "step": 2836, "epoch": 2 }, { "type": "loss", "content": 0.013456666842103004, "timestamp": "2025-09-30 22:16:52.505104", "step": 2837, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:52.539341", "step": 2837, "epoch": 2 }, { "type": "loss", "content": 0.002586257178336382, "timestamp": "2025-09-30 22:16:52.547266", "step": 2838, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:52.582757", "step": 2838, "epoch": 2 }, { "type": "loss", "content": 0.005766110494732857, "timestamp": "2025-09-30 22:16:52.590688", "step": 2839, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:52.631907", "step": 2839, "epoch": 2 }, { "type": "loss", "content": 0.008680349215865135, "timestamp": "2025-09-30 22:16:52.666132", "step": 2840, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:52.703529", "step": 2840, "epoch": 2 }, { "type": "loss", "content": 0.0029739767778664827, "timestamp": "2025-09-30 22:16:52.708715", "step": 2841, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:52.743850", "step": 2841, "epoch": 2 }, { "type": "loss", "content": 0.006776679772883654, "timestamp": "2025-09-30 22:16:52.751084", "step": 2842, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:52.790356", "step": 2842, "epoch": 2 }, { "type": "loss", "content": 0.008047245442867279, "timestamp": "2025-09-30 22:16:52.797905", "step": 2843, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:52.833168", "step": 2843, "epoch": 2 }, { "type": "loss", "content": 0.006306602619588375, "timestamp": "2025-09-30 22:16:52.865094", "step": 2844, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:52.904289", "step": 2844, "epoch": 2 }, { "type": "loss", "content": 0.006988802924752235, "timestamp": "2025-09-30 22:16:52.909335", "step": 2845, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:52.941392", "step": 2845, "epoch": 2 }, { "type": "loss", "content": 0.004913115408271551, "timestamp": "2025-09-30 22:16:52.952363", "step": 2846, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:16:52.985841", "step": 2846, "epoch": 2 }, { "type": "loss", "content": 0.009375456720590591, "timestamp": "2025-09-30 22:16:52.990031", "step": 2847, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:53.026553", "step": 2847, "epoch": 2 }, { "type": "loss", "content": 0.005251064896583557, "timestamp": "2025-09-30 22:16:53.058448", "step": 2848, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:53.097816", "step": 2848, "epoch": 2 }, { "type": "loss", "content": 0.0016738343983888626, "timestamp": "2025-09-30 22:16:53.107436", "step": 2849, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:53.148951", "step": 2849, "epoch": 2 }, { "type": "loss", "content": 0.004856899380683899, "timestamp": "2025-09-30 22:16:53.153531", "step": 2850, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:53.195365", "step": 2850, "epoch": 2 }, { "type": "loss", "content": 0.008596754632890224, "timestamp": "2025-09-30 22:16:53.205735", "step": 2851, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:53.249826", "step": 2851, "epoch": 2 }, { "type": "loss", "content": 0.004325315356254578, "timestamp": "2025-09-30 22:16:53.284568", "step": 2852, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:53.324072", "step": 2852, "epoch": 2 }, { "type": "loss", "content": 0.006976179778575897, "timestamp": "2025-09-30 22:16:53.337215", "step": 2853, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:53.378544", "step": 2853, "epoch": 2 }, { "type": "loss", "content": 0.009582379832863808, "timestamp": "2025-09-30 22:16:53.385685", "step": 2854, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:53.423852", "step": 2854, "epoch": 2 }, { "type": "loss", "content": 0.011023230850696564, "timestamp": "2025-09-30 22:16:53.431326", "step": 2855, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:53.468771", "step": 2855, "epoch": 2 }, { "type": "loss", "content": 0.011062287725508213, "timestamp": "2025-09-30 22:16:53.500658", "step": 2856, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:53.539061", "step": 2856, "epoch": 2 }, { "type": "loss", "content": 0.003815129864960909, "timestamp": "2025-09-30 22:16:53.549008", "step": 2857, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:53.586736", "step": 2857, "epoch": 2 }, { "type": "loss", "content": 0.01672511361539364, "timestamp": "2025-09-30 22:16:53.597937", "step": 2858, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:53.644944", "step": 2858, "epoch": 2 }, { "type": "loss", "content": 0.006538551300764084, "timestamp": "2025-09-30 22:16:53.658603", "step": 2859, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:53.695658", "step": 2859, "epoch": 2 }, { "type": "loss", "content": 0.011332187801599503, "timestamp": "2025-09-30 22:16:53.727648", "step": 2860, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:53.764561", "step": 2860, "epoch": 2 }, { "type": "loss", "content": 0.008668245747685432, "timestamp": "2025-09-30 22:16:53.769259", "step": 2861, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:53.807005", "step": 2861, "epoch": 2 }, { "type": "loss", "content": 0.003763214685022831, "timestamp": "2025-09-30 22:16:53.814724", "step": 2862, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:53.859753", "step": 2862, "epoch": 2 }, { "type": "loss", "content": 0.006051691249012947, "timestamp": "2025-09-30 22:16:53.873773", "step": 2863, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 592 ], "flops": 17560600598464 }, "timestamp": "2025-09-30 22:16:53.930189", "step": 2863, "epoch": 2 }, { "type": "loss", "content": 0.004667391534894705, "timestamp": "2025-09-30 22:16:53.972181", "step": 2864, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:54.012962", "step": 2864, "epoch": 2 }, { "type": "loss", "content": 0.006028117146342993, "timestamp": "2025-09-30 22:16:54.020413", "step": 2865, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:54.062104", "step": 2865, "epoch": 2 }, { "type": "loss", "content": 0.002721439115703106, "timestamp": "2025-09-30 22:16:54.075795", "step": 2866, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:54.116836", "step": 2866, "epoch": 2 }, { "type": "loss", "content": 0.004800109192728996, "timestamp": "2025-09-30 22:16:54.124498", "step": 2867, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:54.168196", "step": 2867, "epoch": 2 }, { "type": "loss", "content": 0.0033718144986778498, "timestamp": "2025-09-30 22:16:54.199282", "step": 2868, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:54.237596", "step": 2868, "epoch": 2 }, { "type": "loss", "content": 0.004756898153573275, "timestamp": "2025-09-30 22:16:54.246365", "step": 2869, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:54.286563", "step": 2869, "epoch": 2 }, { "type": "loss", "content": 0.0047842999920248985, "timestamp": "2025-09-30 22:16:54.294314", "step": 2870, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:16:54.332759", "step": 2870, "epoch": 2 }, { "type": "loss", "content": 0.00872610229998827, "timestamp": "2025-09-30 22:16:54.339988", "step": 2871, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:54.388513", "step": 2871, "epoch": 2 }, { "type": "loss", "content": 0.016180802136659622, "timestamp": "2025-09-30 22:16:54.421870", "step": 2872, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:54.457339", "step": 2872, "epoch": 2 }, { "type": "loss", "content": 0.001919434405863285, "timestamp": "2025-09-30 22:16:54.465984", "step": 2873, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:54.515398", "step": 2873, "epoch": 2 }, { "type": "loss", "content": 0.002931988565251231, "timestamp": "2025-09-30 22:16:54.527958", "step": 2874, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:54.584633", "step": 2874, "epoch": 2 }, { "type": "loss", "content": 0.008017763495445251, "timestamp": "2025-09-30 22:16:54.592343", "step": 2875, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:16:57.286752", "step": 2875, "epoch": 2 }, { "type": "pplx", "content": 5.7056885871454455, "timestamp": "2025-09-30 22:16:57.291891", "step": 2875, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:57.338353", "step": 2875, "epoch": 2 }, { "type": "loss", "content": 0.015178192406892776, "timestamp": "2025-09-30 22:16:57.368316", "step": 2876, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:16:57.410586", "step": 2876, "epoch": 2 }, { "type": "loss", "content": 0.006266669370234013, "timestamp": "2025-09-30 22:16:57.415436", "step": 2877, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:57.452630", "step": 2877, "epoch": 2 }, { "type": "loss", "content": 0.005694805644452572, "timestamp": "2025-09-30 22:16:57.460218", "step": 2878, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:57.512211", "step": 2878, "epoch": 2 }, { "type": "loss", "content": 0.005040735471993685, "timestamp": "2025-09-30 22:16:57.525576", "step": 2879, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:57.561645", "step": 2879, "epoch": 2 }, { "type": "loss", "content": 0.010561853647232056, "timestamp": "2025-09-30 22:16:57.590981", "step": 2880, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:57.639185", "step": 2880, "epoch": 2 }, { "type": "loss", "content": 0.008320540189743042, "timestamp": "2025-09-30 22:16:57.647173", "step": 2881, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:57.682937", "step": 2881, "epoch": 2 }, { "type": "loss", "content": 0.009163618087768555, "timestamp": "2025-09-30 22:16:57.691033", "step": 2882, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:16:57.728859", "step": 2882, "epoch": 2 }, { "type": "loss", "content": 0.007036111783236265, "timestamp": "2025-09-30 22:16:57.740992", "step": 2883, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:57.789547", "step": 2883, "epoch": 2 }, { "type": "loss", "content": 0.008511088788509369, "timestamp": "2025-09-30 22:16:57.823742", "step": 2884, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:57.863258", "step": 2884, "epoch": 2 }, { "type": "loss", "content": 0.0076420544646680355, "timestamp": "2025-09-30 22:16:57.868726", "step": 2885, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:57.921045", "step": 2885, "epoch": 2 }, { "type": "loss", "content": 0.0072248405776917934, "timestamp": "2025-09-30 22:16:57.929032", "step": 2886, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:57.980423", "step": 2886, "epoch": 2 }, { "type": "loss", "content": 0.00268104812130332, "timestamp": "2025-09-30 22:16:57.988332", "step": 2887, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:58.023808", "step": 2887, "epoch": 2 }, { "type": "loss", "content": 0.005942836403846741, "timestamp": "2025-09-30 22:16:58.057197", "step": 2888, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:16:58.103611", "step": 2888, "epoch": 2 }, { "type": "loss", "content": 0.006358643528074026, "timestamp": "2025-09-30 22:16:58.122152", "step": 2889, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:58.172186", "step": 2889, "epoch": 2 }, { "type": "loss", "content": 0.009358215145766735, "timestamp": "2025-09-30 22:16:58.185865", "step": 2890, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:58.233833", "step": 2890, "epoch": 2 }, { "type": "loss", "content": 0.01006359700113535, "timestamp": "2025-09-30 22:16:58.240747", "step": 2891, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:16:58.292537", "step": 2891, "epoch": 2 }, { "type": "loss", "content": 0.00435235258191824, "timestamp": "2025-09-30 22:16:58.329666", "step": 2892, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:58.375789", "step": 2892, "epoch": 2 }, { "type": "loss", "content": 0.005403323099017143, "timestamp": "2025-09-30 22:16:58.388410", "step": 2893, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:16:58.434296", "step": 2893, "epoch": 2 }, { "type": "loss", "content": 0.0034346147440373898, "timestamp": "2025-09-30 22:16:58.448048", "step": 2894, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:58.507041", "step": 2894, "epoch": 2 }, { "type": "loss", "content": 0.009157082065939903, "timestamp": "2025-09-30 22:16:58.515607", "step": 2895, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:16:58.571706", "step": 2895, "epoch": 2 }, { "type": "loss", "content": 0.007458867039531469, "timestamp": "2025-09-30 22:16:58.606492", "step": 2896, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:58.642866", "step": 2896, "epoch": 2 }, { "type": "loss", "content": 0.005464588291943073, "timestamp": "2025-09-30 22:16:58.648022", "step": 2897, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:58.710283", "step": 2897, "epoch": 2 }, { "type": "loss", "content": 0.0011974646477028728, "timestamp": "2025-09-30 22:16:58.720033", "step": 2898, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:58.761397", "step": 2898, "epoch": 2 }, { "type": "loss", "content": 0.008007212541997433, "timestamp": "2025-09-30 22:16:58.771689", "step": 2899, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:58.825698", "step": 2899, "epoch": 2 }, { "type": "loss", "content": 0.0035000897478312254, "timestamp": "2025-09-30 22:16:58.859083", "step": 2900, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:16:58.917655", "step": 2900, "epoch": 2 }, { "type": "loss", "content": 0.004464174620807171, "timestamp": "2025-09-30 22:16:58.933548", "step": 2901, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:58.971567", "step": 2901, "epoch": 2 }, { "type": "loss", "content": 0.007408112287521362, "timestamp": "2025-09-30 22:16:58.982043", "step": 2902, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:59.031842", "step": 2902, "epoch": 2 }, { "type": "loss", "content": 0.006896634586155415, "timestamp": "2025-09-30 22:16:59.039975", "step": 2903, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:59.077582", "step": 2903, "epoch": 2 }, { "type": "loss", "content": 0.0008722843485884368, "timestamp": "2025-09-30 22:16:59.108384", "step": 2904, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:16:59.150699", "step": 2904, "epoch": 2 }, { "type": "loss", "content": 0.007032486144453287, "timestamp": "2025-09-30 22:16:59.163845", "step": 2905, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:59.212817", "step": 2905, "epoch": 2 }, { "type": "loss", "content": 0.003517316887155175, "timestamp": "2025-09-30 22:16:59.219875", "step": 2906, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:59.271295", "step": 2906, "epoch": 2 }, { "type": "loss", "content": 0.013583201915025711, "timestamp": "2025-09-30 22:16:59.280244", "step": 2907, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:16:59.327283", "step": 2907, "epoch": 2 }, { "type": "loss", "content": 0.0067641520872712135, "timestamp": "2025-09-30 22:16:59.359169", "step": 2908, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:16:59.393355", "step": 2908, "epoch": 2 }, { "type": "loss", "content": 0.004752178210765123, "timestamp": "2025-09-30 22:16:59.398978", "step": 2909, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:59.452003", "step": 2909, "epoch": 2 }, { "type": "loss", "content": 0.014300595037639141, "timestamp": "2025-09-30 22:16:59.462179", "step": 2910, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:59.503956", "step": 2910, "epoch": 2 }, { "type": "loss", "content": 0.012174049392342567, "timestamp": "2025-09-30 22:16:59.508402", "step": 2911, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:16:59.559516", "step": 2911, "epoch": 2 }, { "type": "loss", "content": 0.004016537219285965, "timestamp": "2025-09-30 22:16:59.593760", "step": 2912, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:59.627343", "step": 2912, "epoch": 2 }, { "type": "loss", "content": 0.009380542673170567, "timestamp": "2025-09-30 22:16:59.635240", "step": 2913, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:16:59.668597", "step": 2913, "epoch": 2 }, { "type": "loss", "content": 0.003296236041933298, "timestamp": "2025-09-30 22:16:59.676313", "step": 2914, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:59.712346", "step": 2914, "epoch": 2 }, { "type": "loss", "content": 0.003966023214161396, "timestamp": "2025-09-30 22:16:59.722721", "step": 2915, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:16:59.767432", "step": 2915, "epoch": 2 }, { "type": "loss", "content": 0.006834966130554676, "timestamp": "2025-09-30 22:16:59.798655", "step": 2916, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:16:59.835360", "step": 2916, "epoch": 2 }, { "type": "loss", "content": 0.010554447770118713, "timestamp": "2025-09-30 22:16:59.846419", "step": 2917, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:16:59.892493", "step": 2917, "epoch": 2 }, { "type": "loss", "content": 0.008746454492211342, "timestamp": "2025-09-30 22:16:59.899471", "step": 2918, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:16:59.942401", "step": 2918, "epoch": 2 }, { "type": "loss", "content": 0.005904481280595064, "timestamp": "2025-09-30 22:16:59.951949", "step": 2919, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:16:59.993798", "step": 2919, "epoch": 2 }, { "type": "loss", "content": 0.005135437939316034, "timestamp": "2025-09-30 22:17:00.027248", "step": 2920, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:17:00.069343", "step": 2920, "epoch": 2 }, { "type": "loss", "content": 0.007645525969564915, "timestamp": "2025-09-30 22:17:00.080444", "step": 2921, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:17:00.124087", "step": 2921, "epoch": 2 }, { "type": "loss", "content": 0.005756744183599949, "timestamp": "2025-09-30 22:17:00.128638", "step": 2922, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:17:00.188589", "step": 2922, "epoch": 2 }, { "type": "loss", "content": 0.0007258382975123823, "timestamp": "2025-09-30 22:17:00.193107", "step": 2923, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:00.239867", "step": 2923, "epoch": 2 }, { "type": "loss", "content": 0.00860053114593029, "timestamp": "2025-09-30 22:17:00.268643", "step": 2924, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:00.330462", "step": 2924, "epoch": 2 }, { "type": "loss", "content": 0.0024887947365641594, "timestamp": "2025-09-30 22:17:00.339254", "step": 2925, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:00.406045", "step": 2925, "epoch": 2 }, { "type": "loss", "content": 0.009777331724762917, "timestamp": "2025-09-30 22:17:00.415168", "step": 2926, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:00.459996", "step": 2926, "epoch": 2 }, { "type": "loss", "content": 0.0025563391391187906, "timestamp": "2025-09-30 22:17:00.467594", "step": 2927, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:00.501644", "step": 2927, "epoch": 2 }, { "type": "loss", "content": 0.009383581578731537, "timestamp": "2025-09-30 22:17:00.529621", "step": 2928, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:00.564929", "step": 2928, "epoch": 2 }, { "type": "loss", "content": 0.007304985076189041, "timestamp": "2025-09-30 22:17:00.569548", "step": 2929, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:00.612284", "step": 2929, "epoch": 2 }, { "type": "loss", "content": 0.004353808239102364, "timestamp": "2025-09-30 22:17:00.623358", "step": 2930, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:00.658178", "step": 2930, "epoch": 2 }, { "type": "loss", "content": 0.008152646012604237, "timestamp": "2025-09-30 22:17:00.670293", "step": 2931, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:00.715731", "step": 2931, "epoch": 2 }, { "type": "loss", "content": 0.012269679456949234, "timestamp": "2025-09-30 22:17:00.750604", "step": 2932, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:00.800482", "step": 2932, "epoch": 2 }, { "type": "loss", "content": 0.004335030913352966, "timestamp": "2025-09-30 22:17:00.812364", "step": 2933, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:00.858823", "step": 2933, "epoch": 2 }, { "type": "loss", "content": 0.007889092899858952, "timestamp": "2025-09-30 22:17:00.869720", "step": 2934, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:00.913045", "step": 2934, "epoch": 2 }, { "type": "loss", "content": 0.005649187136441469, "timestamp": "2025-09-30 22:17:00.923362", "step": 2935, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:00.968089", "step": 2935, "epoch": 2 }, { "type": "loss", "content": 0.0028243919368833303, "timestamp": "2025-09-30 22:17:00.996142", "step": 2936, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:17:01.052426", "step": 2936, "epoch": 2 }, { "type": "loss", "content": 0.00842214748263359, "timestamp": "2025-09-30 22:17:01.067569", "step": 2937, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:01.122236", "step": 2937, "epoch": 2 }, { "type": "loss", "content": 0.007733777165412903, "timestamp": "2025-09-30 22:17:01.136017", "step": 2938, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:01.187347", "step": 2938, "epoch": 2 }, { "type": "loss", "content": 0.015714704990386963, "timestamp": "2025-09-30 22:17:01.197545", "step": 2939, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:01.242699", "step": 2939, "epoch": 2 }, { "type": "loss", "content": 0.011648855172097683, "timestamp": "2025-09-30 22:17:01.272540", "step": 2940, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:01.312034", "step": 2940, "epoch": 2 }, { "type": "loss", "content": 0.00478572491556406, "timestamp": "2025-09-30 22:17:01.321000", "step": 2941, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:01.359405", "step": 2941, "epoch": 2 }, { "type": "loss", "content": 0.00669105863198638, "timestamp": "2025-09-30 22:17:01.366990", "step": 2942, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:01.417359", "step": 2942, "epoch": 2 }, { "type": "loss", "content": 0.004550726152956486, "timestamp": "2025-09-30 22:17:01.431375", "step": 2943, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:01.472127", "step": 2943, "epoch": 2 }, { "type": "loss", "content": 0.005055215209722519, "timestamp": "2025-09-30 22:17:01.503002", "step": 2944, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:17:01.561723", "step": 2944, "epoch": 2 }, { "type": "loss", "content": 0.008799183182418346, "timestamp": "2025-09-30 22:17:01.578688", "step": 2945, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:01.625798", "step": 2945, "epoch": 2 }, { "type": "loss", "content": 0.005056922324001789, "timestamp": "2025-09-30 22:17:01.633718", "step": 2946, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:01.671199", "step": 2946, "epoch": 2 }, { "type": "loss", "content": 0.007015272043645382, "timestamp": "2025-09-30 22:17:01.685046", "step": 2947, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:01.730298", "step": 2947, "epoch": 2 }, { "type": "loss", "content": 0.008410302922129631, "timestamp": "2025-09-30 22:17:01.760127", "step": 2948, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:01.806250", "step": 2948, "epoch": 2 }, { "type": "loss", "content": 0.013081254437565804, "timestamp": "2025-09-30 22:17:01.811840", "step": 2949, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:01.853331", "step": 2949, "epoch": 2 }, { "type": "loss", "content": 0.005661000497639179, "timestamp": "2025-09-30 22:17:01.860560", "step": 2950, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:01.909311", "step": 2950, "epoch": 2 }, { "type": "loss", "content": 0.009392441250383854, "timestamp": "2025-09-30 22:17:01.921473", "step": 2951, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:01.959774", "step": 2951, "epoch": 2 }, { "type": "loss", "content": 0.002402786398306489, "timestamp": "2025-09-30 22:17:01.987503", "step": 2952, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:02.021233", "step": 2952, "epoch": 2 }, { "type": "loss", "content": 0.005488426424562931, "timestamp": "2025-09-30 22:17:02.026486", "step": 2953, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:02.060254", "step": 2953, "epoch": 2 }, { "type": "loss", "content": 0.012102757580578327, "timestamp": "2025-09-30 22:17:02.072856", "step": 2954, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:02.118740", "step": 2954, "epoch": 2 }, { "type": "loss", "content": 0.008857803419232368, "timestamp": "2025-09-30 22:17:02.129007", "step": 2955, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:02.184826", "step": 2955, "epoch": 2 }, { "type": "loss", "content": 0.003531576367095113, "timestamp": "2025-09-30 22:17:02.216484", "step": 2956, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:02.252525", "step": 2956, "epoch": 2 }, { "type": "loss", "content": 0.008055913262069225, "timestamp": "2025-09-30 22:17:02.265497", "step": 2957, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:02.300040", "step": 2957, "epoch": 2 }, { "type": "loss", "content": 0.003228359157219529, "timestamp": "2025-09-30 22:17:02.306955", "step": 2958, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:02.354609", "step": 2958, "epoch": 2 }, { "type": "loss", "content": 0.006139842327684164, "timestamp": "2025-09-30 22:17:02.364888", "step": 2959, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:02.416576", "step": 2959, "epoch": 2 }, { "type": "loss", "content": 0.002982628531754017, "timestamp": "2025-09-30 22:17:02.444204", "step": 2960, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:17:02.493361", "step": 2960, "epoch": 2 }, { "type": "loss", "content": 0.0012173228897154331, "timestamp": "2025-09-30 22:17:02.509146", "step": 2961, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:02.549634", "step": 2961, "epoch": 2 }, { "type": "loss", "content": 0.008141208440065384, "timestamp": "2025-09-30 22:17:02.556546", "step": 2962, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:02.611191", "step": 2962, "epoch": 2 }, { "type": "loss", "content": 0.0023765189107507467, "timestamp": "2025-09-30 22:17:02.623789", "step": 2963, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:02.662087", "step": 2963, "epoch": 2 }, { "type": "loss", "content": 0.010212529450654984, "timestamp": "2025-09-30 22:17:02.693422", "step": 2964, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:02.749492", "step": 2964, "epoch": 2 }, { "type": "loss", "content": 0.012079106643795967, "timestamp": "2025-09-30 22:17:02.762882", "step": 2965, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:02.821060", "step": 2965, "epoch": 2 }, { "type": "loss", "content": 0.005635153967887163, "timestamp": "2025-09-30 22:17:02.835079", "step": 2966, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:02.905200", "step": 2966, "epoch": 2 }, { "type": "loss", "content": 0.004532633814960718, "timestamp": "2025-09-30 22:17:02.917459", "step": 2967, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:02.964290", "step": 2967, "epoch": 2 }, { "type": "loss", "content": 0.01131241861730814, "timestamp": "2025-09-30 22:17:02.992834", "step": 2968, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:03.034600", "step": 2968, "epoch": 2 }, { "type": "loss", "content": 0.010123826563358307, "timestamp": "2025-09-30 22:17:03.047975", "step": 2969, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:03.100745", "step": 2969, "epoch": 2 }, { "type": "loss", "content": 0.005481799598783255, "timestamp": "2025-09-30 22:17:03.113334", "step": 2970, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:17:03.163057", "step": 2970, "epoch": 2 }, { "type": "loss", "content": 0.012069096788764, "timestamp": "2025-09-30 22:17:03.178639", "step": 2971, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:03.219919", "step": 2971, "epoch": 2 }, { "type": "loss", "content": 0.00575965316966176, "timestamp": "2025-09-30 22:17:03.248683", "step": 2972, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:03.291592", "step": 2972, "epoch": 2 }, { "type": "loss", "content": 0.007919232361018658, "timestamp": "2025-09-30 22:17:03.304614", "step": 2973, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:03.354851", "step": 2973, "epoch": 2 }, { "type": "loss", "content": 0.0066139488480985165, "timestamp": "2025-09-30 22:17:03.368670", "step": 2974, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:03.414453", "step": 2974, "epoch": 2 }, { "type": "loss", "content": 0.009215443395078182, "timestamp": "2025-09-30 22:17:03.424011", "step": 2975, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:03.486425", "step": 2975, "epoch": 2 }, { "type": "loss", "content": 0.007280724588781595, "timestamp": "2025-09-30 22:17:03.520643", "step": 2976, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:03.577036", "step": 2976, "epoch": 2 }, { "type": "loss", "content": 0.007378325331956148, "timestamp": "2025-09-30 22:17:03.587569", "step": 2977, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:03.638200", "step": 2977, "epoch": 2 }, { "type": "loss", "content": 0.002158799208700657, "timestamp": "2025-09-30 22:17:03.650402", "step": 2978, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:03.704487", "step": 2978, "epoch": 2 }, { "type": "loss", "content": 0.008832129649817944, "timestamp": "2025-09-30 22:17:03.715384", "step": 2979, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:03.767928", "step": 2979, "epoch": 2 }, { "type": "loss", "content": 0.009373163804411888, "timestamp": "2025-09-30 22:17:03.806012", "step": 2980, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:03.851225", "step": 2980, "epoch": 2 }, { "type": "loss", "content": 0.0014124346198514104, "timestamp": "2025-09-30 22:17:03.856676", "step": 2981, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:03.896582", "step": 2981, "epoch": 2 }, { "type": "loss", "content": 0.005799147766083479, "timestamp": "2025-09-30 22:17:03.908800", "step": 2982, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:03.955107", "step": 2982, "epoch": 2 }, { "type": "loss", "content": 0.013330061919987202, "timestamp": "2025-09-30 22:17:03.966136", "step": 2983, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:04.002303", "step": 2983, "epoch": 2 }, { "type": "loss", "content": 0.0024350215680897236, "timestamp": "2025-09-30 22:17:04.034166", "step": 2984, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:04.082567", "step": 2984, "epoch": 2 }, { "type": "loss", "content": 0.010918851010501385, "timestamp": "2025-09-30 22:17:04.095552", "step": 2985, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:04.138505", "step": 2985, "epoch": 2 }, { "type": "loss", "content": 0.005125465802848339, "timestamp": "2025-09-30 22:17:04.145930", "step": 2986, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:04.186421", "step": 2986, "epoch": 2 }, { "type": "loss", "content": 0.00508470693603158, "timestamp": "2025-09-30 22:17:04.198561", "step": 2987, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:04.233943", "step": 2987, "epoch": 2 }, { "type": "loss", "content": 0.008678026497364044, "timestamp": "2025-09-30 22:17:04.266043", "step": 2988, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:04.311350", "step": 2988, "epoch": 2 }, { "type": "loss", "content": 0.004566236399114132, "timestamp": "2025-09-30 22:17:04.316838", "step": 2989, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:04.363898", "step": 2989, "epoch": 2 }, { "type": "loss", "content": 0.0023392995353788137, "timestamp": "2025-09-30 22:17:04.375928", "step": 2990, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:17:07.096095", "step": 2990, "epoch": 2 }, { "type": "pplx", "content": 5.928878182497796, "timestamp": "2025-09-30 22:17:07.099519", "step": 2990, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:07.132110", "step": 2990, "epoch": 2 }, { "type": "loss", "content": 0.014219870790839195, "timestamp": "2025-09-30 22:17:07.143294", "step": 2991, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:07.179397", "step": 2991, "epoch": 2 }, { "type": "loss", "content": 0.002887843642383814, "timestamp": "2025-09-30 22:17:07.212539", "step": 2992, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:07.253723", "step": 2992, "epoch": 2 }, { "type": "loss", "content": 0.004083904437720776, "timestamp": "2025-09-30 22:17:07.262348", "step": 2993, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:07.310734", "step": 2993, "epoch": 2 }, { "type": "loss", "content": 0.001674058148637414, "timestamp": "2025-09-30 22:17:07.323144", "step": 2994, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:07.370714", "step": 2994, "epoch": 2 }, { "type": "loss", "content": 0.011439098045229912, "timestamp": "2025-09-30 22:17:07.381724", "step": 2995, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:07.427584", "step": 2995, "epoch": 2 }, { "type": "loss", "content": 0.0066447388380765915, "timestamp": "2025-09-30 22:17:07.460416", "step": 2996, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:17:07.514410", "step": 2996, "epoch": 2 }, { "type": "loss", "content": 0.0034933576826006174, "timestamp": "2025-09-30 22:17:07.531183", "step": 2997, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:07.571953", "step": 2997, "epoch": 2 }, { "type": "loss", "content": 0.005900776479393244, "timestamp": "2025-09-30 22:17:07.585309", "step": 2998, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:07.627921", "step": 2998, "epoch": 2 }, { "type": "loss", "content": 0.022136129438877106, "timestamp": "2025-09-30 22:17:07.641615", "step": 2999, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:07.680784", "step": 2999, "epoch": 2 }, { "type": "loss", "content": 0.009631790220737457, "timestamp": "2025-09-30 22:17:07.715002", "step": 3000, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 3000", "timestamp": "2025-09-30 22:17:12.635303", "step": 3000, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:12.677170", "step": 3000, "epoch": 2 }, { "type": "loss", "content": 0.00503592099994421, "timestamp": "2025-09-30 22:17:12.689933", "step": 3001, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:12.731922", "step": 3001, "epoch": 2 }, { "type": "loss", "content": 0.005273853428661823, "timestamp": "2025-09-30 22:17:12.744425", "step": 3002, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:12.777227", "step": 3002, "epoch": 2 }, { "type": "loss", "content": 0.005171041004359722, "timestamp": "2025-09-30 22:17:12.789445", "step": 3003, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:12.837720", "step": 3003, "epoch": 2 }, { "type": "loss", "content": 0.0026260169688612223, "timestamp": "2025-09-30 22:17:12.872298", "step": 3004, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:17:12.916880", "step": 3004, "epoch": 2 }, { "type": "loss", "content": 0.003942606970667839, "timestamp": "2025-09-30 22:17:12.932532", "step": 3005, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:12.966468", "step": 3005, "epoch": 2 }, { "type": "loss", "content": 0.011952736414968967, "timestamp": "2025-09-30 22:17:12.977621", "step": 3006, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:13.017809", "step": 3006, "epoch": 2 }, { "type": "loss", "content": 0.007050004787743092, "timestamp": "2025-09-30 22:17:13.030395", "step": 3007, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:13.068980", "step": 3007, "epoch": 2 }, { "type": "loss", "content": 0.007081000600010157, "timestamp": "2025-09-30 22:17:13.103728", "step": 3008, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:13.137718", "step": 3008, "epoch": 2 }, { "type": "loss", "content": 0.01373226847499609, "timestamp": "2025-09-30 22:17:13.148311", "step": 3009, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:13.189368", "step": 3009, "epoch": 2 }, { "type": "loss", "content": 0.008119028992950916, "timestamp": "2025-09-30 22:17:13.202012", "step": 3010, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:13.255521", "step": 3010, "epoch": 2 }, { "type": "loss", "content": 0.009641955606639385, "timestamp": "2025-09-30 22:17:13.268078", "step": 3011, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:13.305539", "step": 3011, "epoch": 2 }, { "type": "loss", "content": 0.0064568473026156425, "timestamp": "2025-09-30 22:17:13.338958", "step": 3012, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:13.388763", "step": 3012, "epoch": 2 }, { "type": "loss", "content": 0.00892971083521843, "timestamp": "2025-09-30 22:17:13.397890", "step": 3013, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:13.436294", "step": 3013, "epoch": 2 }, { "type": "loss", "content": 0.007372962776571512, "timestamp": "2025-09-30 22:17:13.450108", "step": 3014, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:17:13.499163", "step": 3014, "epoch": 2 }, { "type": "loss", "content": 0.011199056170880795, "timestamp": "2025-09-30 22:17:13.514769", "step": 3015, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:13.564847", "step": 3015, "epoch": 2 }, { "type": "loss", "content": 0.009116713888943195, "timestamp": "2025-09-30 22:17:13.599438", "step": 3016, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:17:13.650464", "step": 3016, "epoch": 2 }, { "type": "loss", "content": 0.005207949783653021, "timestamp": "2025-09-30 22:17:13.667823", "step": 3017, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:13.703555", "step": 3017, "epoch": 2 }, { "type": "loss", "content": 0.017830152064561844, "timestamp": "2025-09-30 22:17:13.716896", "step": 3018, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:13.752480", "step": 3018, "epoch": 2 }, { "type": "loss", "content": 0.0038366373628377914, "timestamp": "2025-09-30 22:17:13.763265", "step": 3019, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:13.802976", "step": 3019, "epoch": 2 }, { "type": "loss", "content": 0.008974471129477024, "timestamp": "2025-09-30 22:17:13.835760", "step": 3020, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:13.867898", "step": 3020, "epoch": 2 }, { "type": "loss", "content": 0.009324179030954838, "timestamp": "2025-09-30 22:17:13.880624", "step": 3021, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:13.928714", "step": 3021, "epoch": 2 }, { "type": "loss", "content": 0.007255516946315765, "timestamp": "2025-09-30 22:17:13.942526", "step": 3022, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:13.979826", "step": 3022, "epoch": 2 }, { "type": "loss", "content": 0.0052978722378611565, "timestamp": "2025-09-30 22:17:13.993558", "step": 3023, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:14.036132", "step": 3023, "epoch": 2 }, { "type": "loss", "content": 0.0037230406887829304, "timestamp": "2025-09-30 22:17:14.070334", "step": 3024, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:14.111207", "step": 3024, "epoch": 2 }, { "type": "loss", "content": 0.010426033288240433, "timestamp": "2025-09-30 22:17:14.121072", "step": 3025, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:14.157374", "step": 3025, "epoch": 2 }, { "type": "loss", "content": 0.005306093487888575, "timestamp": "2025-09-30 22:17:14.168555", "step": 3026, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:14.206589", "step": 3026, "epoch": 2 }, { "type": "loss", "content": 0.009547659195959568, "timestamp": "2025-09-30 22:17:14.217710", "step": 3027, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:14.262170", "step": 3027, "epoch": 2 }, { "type": "loss", "content": 0.010315535590052605, "timestamp": "2025-09-30 22:17:14.295465", "step": 3028, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:14.350705", "step": 3028, "epoch": 2 }, { "type": "loss", "content": 0.007825941778719425, "timestamp": "2025-09-30 22:17:14.355549", "step": 3029, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:14.388480", "step": 3029, "epoch": 2 }, { "type": "loss", "content": 0.004918771330267191, "timestamp": "2025-09-30 22:17:14.399693", "step": 3030, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:17:14.450381", "step": 3030, "epoch": 2 }, { "type": "loss", "content": 0.01733410358428955, "timestamp": "2025-09-30 22:17:14.467464", "step": 3031, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:14.513902", "step": 3031, "epoch": 2 }, { "type": "loss", "content": 0.0072146011516451836, "timestamp": "2025-09-30 22:17:14.547090", "step": 3032, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:14.581736", "step": 3032, "epoch": 2 }, { "type": "loss", "content": 0.007138307671993971, "timestamp": "2025-09-30 22:17:14.594363", "step": 3033, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:17:14.652478", "step": 3033, "epoch": 2 }, { "type": "loss", "content": 0.005867151077836752, "timestamp": "2025-09-30 22:17:14.668181", "step": 3034, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:17:14.718086", "step": 3034, "epoch": 2 }, { "type": "loss", "content": 0.0055921124294400215, "timestamp": "2025-09-30 22:17:14.737164", "step": 3035, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:14.794496", "step": 3035, "epoch": 2 }, { "type": "loss", "content": 0.006952265743166208, "timestamp": "2025-09-30 22:17:14.829284", "step": 3036, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:14.861744", "step": 3036, "epoch": 2 }, { "type": "loss", "content": 0.026587001979351044, "timestamp": "2025-09-30 22:17:14.871655", "step": 3037, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:14.904413", "step": 3037, "epoch": 2 }, { "type": "loss", "content": 0.00618098396807909, "timestamp": "2025-09-30 22:17:14.916735", "step": 3038, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:14.956335", "step": 3038, "epoch": 2 }, { "type": "loss", "content": 0.005510224495083094, "timestamp": "2025-09-30 22:17:14.970109", "step": 3039, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:15.008843", "step": 3039, "epoch": 2 }, { "type": "loss", "content": 0.007888545282185078, "timestamp": "2025-09-30 22:17:15.043521", "step": 3040, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:17:15.085047", "step": 3040, "epoch": 2 }, { "type": "loss", "content": 0.00570820365101099, "timestamp": "2025-09-30 22:17:15.100470", "step": 3041, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:15.154341", "step": 3041, "epoch": 2 }, { "type": "loss", "content": 0.004262340720742941, "timestamp": "2025-09-30 22:17:15.168069", "step": 3042, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:15.218947", "step": 3042, "epoch": 2 }, { "type": "loss", "content": 0.004007878713309765, "timestamp": "2025-09-30 22:17:15.230104", "step": 3043, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:15.267381", "step": 3043, "epoch": 2 }, { "type": "loss", "content": 0.0045262956991791725, "timestamp": "2025-09-30 22:17:15.300735", "step": 3044, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:17:15.351259", "step": 3044, "epoch": 2 }, { "type": "loss", "content": 0.009178610518574715, "timestamp": "2025-09-30 22:17:15.367106", "step": 3045, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:15.420548", "step": 3045, "epoch": 2 }, { "type": "loss", "content": 0.013096172362565994, "timestamp": "2025-09-30 22:17:15.436932", "step": 3046, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:15.478593", "step": 3046, "epoch": 2 }, { "type": "loss", "content": 0.007595235947519541, "timestamp": "2025-09-30 22:17:15.491999", "step": 3047, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:17:15.541654", "step": 3047, "epoch": 2 }, { "type": "loss", "content": 0.0028729906771332026, "timestamp": "2025-09-30 22:17:15.578709", "step": 3048, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:15.619597", "step": 3048, "epoch": 2 }, { "type": "loss", "content": 0.008607570081949234, "timestamp": "2025-09-30 22:17:15.632706", "step": 3049, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:15.680521", "step": 3049, "epoch": 2 }, { "type": "loss", "content": 0.004809021484106779, "timestamp": "2025-09-30 22:17:15.693132", "step": 3050, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:17:15.752766", "step": 3050, "epoch": 2 }, { "type": "loss", "content": 0.008124749176204205, "timestamp": "2025-09-30 22:17:15.770135", "step": 3051, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:15.830341", "step": 3051, "epoch": 2 }, { "type": "loss", "content": 0.006094157230108976, "timestamp": "2025-09-30 22:17:15.865191", "step": 3052, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:15.905289", "step": 3052, "epoch": 2 }, { "type": "loss", "content": 0.008468708023428917, "timestamp": "2025-09-30 22:17:15.915435", "step": 3053, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:15.966213", "step": 3053, "epoch": 2 }, { "type": "loss", "content": 0.006911612581461668, "timestamp": "2025-09-30 22:17:15.973124", "step": 3054, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:16.019021", "step": 3054, "epoch": 2 }, { "type": "loss", "content": 0.008364547975361347, "timestamp": "2025-09-30 22:17:16.027050", "step": 3055, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:16.076435", "step": 3055, "epoch": 2 }, { "type": "loss", "content": 0.010809467174112797, "timestamp": "2025-09-30 22:17:16.105159", "step": 3056, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:16.151796", "step": 3056, "epoch": 2 }, { "type": "loss", "content": 0.005176931619644165, "timestamp": "2025-09-30 22:17:16.157398", "step": 3057, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:16.207281", "step": 3057, "epoch": 2 }, { "type": "loss", "content": 0.006025994196534157, "timestamp": "2025-09-30 22:17:16.215231", "step": 3058, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:16.270242", "step": 3058, "epoch": 2 }, { "type": "loss", "content": 0.007871462032198906, "timestamp": "2025-09-30 22:17:16.277671", "step": 3059, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:16.328676", "step": 3059, "epoch": 2 }, { "type": "loss", "content": 0.005231105722486973, "timestamp": "2025-09-30 22:17:16.359756", "step": 3060, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:16.408175", "step": 3060, "epoch": 2 }, { "type": "loss", "content": 0.007062830962240696, "timestamp": "2025-09-30 22:17:16.412934", "step": 3061, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:16.461743", "step": 3061, "epoch": 2 }, { "type": "loss", "content": 0.004921720828860998, "timestamp": "2025-09-30 22:17:16.472250", "step": 3062, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:16.518336", "step": 3062, "epoch": 2 }, { "type": "loss", "content": 0.005504702217876911, "timestamp": "2025-09-30 22:17:16.530847", "step": 3063, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:16.577868", "step": 3063, "epoch": 2 }, { "type": "loss", "content": 0.01199677586555481, "timestamp": "2025-09-30 22:17:16.608408", "step": 3064, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:16.662637", "step": 3064, "epoch": 2 }, { "type": "loss", "content": 0.0182404275983572, "timestamp": "2025-09-30 22:17:16.671114", "step": 3065, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:16.726724", "step": 3065, "epoch": 2 }, { "type": "loss", "content": 0.0021776268258690834, "timestamp": "2025-09-30 22:17:16.737079", "step": 3066, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:16.776425", "step": 3066, "epoch": 2 }, { "type": "loss", "content": 0.003288572421297431, "timestamp": "2025-09-30 22:17:16.783756", "step": 3067, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:16.831347", "step": 3067, "epoch": 2 }, { "type": "loss", "content": 0.008751352317631245, "timestamp": "2025-09-30 22:17:16.859204", "step": 3068, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:17:16.921116", "step": 3068, "epoch": 2 }, { "type": "loss", "content": 0.002015740144997835, "timestamp": "2025-09-30 22:17:16.924352", "step": 3069, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:16.962523", "step": 3069, "epoch": 2 }, { "type": "loss", "content": 0.006834395695477724, "timestamp": "2025-09-30 22:17:16.970227", "step": 3070, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:17.009631", "step": 3070, "epoch": 2 }, { "type": "loss", "content": 0.005788995418697596, "timestamp": "2025-09-30 22:17:17.021895", "step": 3071, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:17.061097", "step": 3071, "epoch": 2 }, { "type": "loss", "content": 0.010872102342545986, "timestamp": "2025-09-30 22:17:17.089880", "step": 3072, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:17.142862", "step": 3072, "epoch": 2 }, { "type": "loss", "content": 0.0029374524019658566, "timestamp": "2025-09-30 22:17:17.148127", "step": 3073, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:17.202343", "step": 3073, "epoch": 2 }, { "type": "loss", "content": 0.0030281918589025736, "timestamp": "2025-09-30 22:17:17.214664", "step": 3074, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:17.269285", "step": 3074, "epoch": 2 }, { "type": "loss", "content": 0.005195050500333309, "timestamp": "2025-09-30 22:17:17.282642", "step": 3075, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:17.321975", "step": 3075, "epoch": 2 }, { "type": "loss", "content": 0.008502720855176449, "timestamp": "2025-09-30 22:17:17.349858", "step": 3076, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:17.391182", "step": 3076, "epoch": 2 }, { "type": "loss", "content": 0.004310964606702328, "timestamp": "2025-09-30 22:17:17.396917", "step": 3077, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:17.436861", "step": 3077, "epoch": 2 }, { "type": "loss", "content": 0.004775337874889374, "timestamp": "2025-09-30 22:17:17.444796", "step": 3078, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:17.490689", "step": 3078, "epoch": 2 }, { "type": "loss", "content": 0.012922474183142185, "timestamp": "2025-09-30 22:17:17.501260", "step": 3079, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:17.559947", "step": 3079, "epoch": 2 }, { "type": "loss", "content": 0.003970756195485592, "timestamp": "2025-09-30 22:17:17.590605", "step": 3080, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:17.633165", "step": 3080, "epoch": 2 }, { "type": "loss", "content": 0.020425280556082726, "timestamp": "2025-09-30 22:17:17.638063", "step": 3081, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:17.678431", "step": 3081, "epoch": 2 }, { "type": "loss", "content": 0.004362175241112709, "timestamp": "2025-09-30 22:17:17.685666", "step": 3082, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:17.722235", "step": 3082, "epoch": 2 }, { "type": "loss", "content": 0.007760615553706884, "timestamp": "2025-09-30 22:17:17.729463", "step": 3083, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:17.785058", "step": 3083, "epoch": 2 }, { "type": "loss", "content": 0.0030244977679103613, "timestamp": "2025-09-30 22:17:17.818428", "step": 3084, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:17.863019", "step": 3084, "epoch": 2 }, { "type": "loss", "content": 0.0019816835410892963, "timestamp": "2025-09-30 22:17:17.870777", "step": 3085, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:17.932984", "step": 3085, "epoch": 2 }, { "type": "loss", "content": 0.008955265395343304, "timestamp": "2025-09-30 22:17:17.940671", "step": 3086, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:17.988177", "step": 3086, "epoch": 2 }, { "type": "loss", "content": 0.006537090055644512, "timestamp": "2025-09-30 22:17:17.995765", "step": 3087, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:18.043815", "step": 3087, "epoch": 2 }, { "type": "loss", "content": 0.0045349858701229095, "timestamp": "2025-09-30 22:17:18.072638", "step": 3088, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:18.109857", "step": 3088, "epoch": 2 }, { "type": "loss", "content": 0.0019489992409944534, "timestamp": "2025-09-30 22:17:18.114704", "step": 3089, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:18.162190", "step": 3089, "epoch": 2 }, { "type": "loss", "content": 0.007369097787886858, "timestamp": "2025-09-30 22:17:18.170125", "step": 3090, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:18.208164", "step": 3090, "epoch": 2 }, { "type": "loss", "content": 0.007728835102170706, "timestamp": "2025-09-30 22:17:18.219190", "step": 3091, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:18.261980", "step": 3091, "epoch": 2 }, { "type": "loss", "content": 0.006376664619892836, "timestamp": "2025-09-30 22:17:18.293163", "step": 3092, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:18.340763", "step": 3092, "epoch": 2 }, { "type": "loss", "content": 0.011707457713782787, "timestamp": "2025-09-30 22:17:18.349447", "step": 3093, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:18.391062", "step": 3093, "epoch": 2 }, { "type": "loss", "content": 0.0010542640229687095, "timestamp": "2025-09-30 22:17:18.398847", "step": 3094, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:18.446450", "step": 3094, "epoch": 2 }, { "type": "loss", "content": 0.006500933784991503, "timestamp": "2025-09-30 22:17:18.454134", "step": 3095, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:18.495974", "step": 3095, "epoch": 2 }, { "type": "loss", "content": 0.004050260875374079, "timestamp": "2025-09-30 22:17:18.524724", "step": 3096, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:18.561262", "step": 3096, "epoch": 2 }, { "type": "loss", "content": 0.009400052949786186, "timestamp": "2025-09-30 22:17:18.569814", "step": 3097, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:18.609096", "step": 3097, "epoch": 2 }, { "type": "loss", "content": 0.003274328075349331, "timestamp": "2025-09-30 22:17:18.619339", "step": 3098, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:18.654643", "step": 3098, "epoch": 2 }, { "type": "loss", "content": 0.0031144127715379, "timestamp": "2025-09-30 22:17:18.664875", "step": 3099, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:18.728374", "step": 3099, "epoch": 2 }, { "type": "loss", "content": 0.001828193198889494, "timestamp": "2025-09-30 22:17:18.760104", "step": 3100, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:18.801727", "step": 3100, "epoch": 2 }, { "type": "loss", "content": 0.004855462349951267, "timestamp": "2025-09-30 22:17:18.809767", "step": 3101, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:18.856681", "step": 3101, "epoch": 2 }, { "type": "loss", "content": 0.007850201800465584, "timestamp": "2025-09-30 22:17:18.867744", "step": 3102, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:18.914726", "step": 3102, "epoch": 2 }, { "type": "loss", "content": 0.0016034794971346855, "timestamp": "2025-09-30 22:17:18.922722", "step": 3103, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:18.957495", "step": 3103, "epoch": 2 }, { "type": "loss", "content": 0.050340570509433746, "timestamp": "2025-09-30 22:17:18.989455", "step": 3104, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:19.023702", "step": 3104, "epoch": 2 }, { "type": "loss", "content": 0.006957797799259424, "timestamp": "2025-09-30 22:17:19.032287", "step": 3105, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:17:21.694363", "step": 3105, "epoch": 2 }, { "type": "pplx", "content": 5.915286211797802, "timestamp": "2025-09-30 22:17:21.697768", "step": 3105, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:21.738087", "step": 3105, "epoch": 2 }, { "type": "loss", "content": 0.014353587292134762, "timestamp": "2025-09-30 22:17:21.749638", "step": 3106, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:21.794936", "step": 3106, "epoch": 2 }, { "type": "loss", "content": 0.0015971955144777894, "timestamp": "2025-09-30 22:17:21.807534", "step": 3107, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:21.860590", "step": 3107, "epoch": 2 }, { "type": "loss", "content": 0.0026139733381569386, "timestamp": "2025-09-30 22:17:21.892341", "step": 3108, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:21.928180", "step": 3108, "epoch": 2 }, { "type": "loss", "content": 0.003356874454766512, "timestamp": "2025-09-30 22:17:21.936980", "step": 3109, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:21.974838", "step": 3109, "epoch": 2 }, { "type": "loss", "content": 0.005268130451440811, "timestamp": "2025-09-30 22:17:21.985946", "step": 3110, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:22.026853", "step": 3110, "epoch": 2 }, { "type": "loss", "content": 0.005021201446652412, "timestamp": "2025-09-30 22:17:22.039073", "step": 3111, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:22.093171", "step": 3111, "epoch": 2 }, { "type": "loss", "content": 0.0069105904549360275, "timestamp": "2025-09-30 22:17:22.121972", "step": 3112, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:22.163915", "step": 3112, "epoch": 2 }, { "type": "loss", "content": 0.011179746128618717, "timestamp": "2025-09-30 22:17:22.181149", "step": 3113, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:22.228091", "step": 3113, "epoch": 2 }, { "type": "loss", "content": 0.0006776591180823743, "timestamp": "2025-09-30 22:17:22.239160", "step": 3114, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:22.277261", "step": 3114, "epoch": 2 }, { "type": "loss", "content": 0.006834322586655617, "timestamp": "2025-09-30 22:17:22.289877", "step": 3115, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:22.334046", "step": 3115, "epoch": 2 }, { "type": "loss", "content": 0.012681066989898682, "timestamp": "2025-09-30 22:17:22.368288", "step": 3116, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:22.406542", "step": 3116, "epoch": 2 }, { "type": "loss", "content": 0.0020665479823946953, "timestamp": "2025-09-30 22:17:22.414412", "step": 3117, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:22.463655", "step": 3117, "epoch": 2 }, { "type": "loss", "content": 0.0028975976165384054, "timestamp": "2025-09-30 22:17:22.476255", "step": 3118, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:22.524353", "step": 3118, "epoch": 2 }, { "type": "loss", "content": 0.0044283573515713215, "timestamp": "2025-09-30 22:17:22.536688", "step": 3119, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:22.587305", "step": 3119, "epoch": 2 }, { "type": "loss", "content": 0.016512639820575714, "timestamp": "2025-09-30 22:17:22.620759", "step": 3120, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:22.657410", "step": 3120, "epoch": 2 }, { "type": "loss", "content": 0.007381323724985123, "timestamp": "2025-09-30 22:17:22.670017", "step": 3121, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:22.704965", "step": 3121, "epoch": 2 }, { "type": "loss", "content": 0.0029324996285140514, "timestamp": "2025-09-30 22:17:22.715970", "step": 3122, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:22.757110", "step": 3122, "epoch": 2 }, { "type": "loss", "content": 0.01893054135143757, "timestamp": "2025-09-30 22:17:22.768242", "step": 3123, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:22.811161", "step": 3123, "epoch": 2 }, { "type": "loss", "content": 0.006030916702002287, "timestamp": "2025-09-30 22:17:22.844359", "step": 3124, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:22.878923", "step": 3124, "epoch": 2 }, { "type": "loss", "content": 0.0008163400925695896, "timestamp": "2025-09-30 22:17:22.887818", "step": 3125, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:22.940676", "step": 3125, "epoch": 2 }, { "type": "loss", "content": 0.0037008821964263916, "timestamp": "2025-09-30 22:17:22.951828", "step": 3126, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:22.993521", "step": 3126, "epoch": 2 }, { "type": "loss", "content": 0.0012607695534825325, "timestamp": "2025-09-30 22:17:23.006085", "step": 3127, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:23.040621", "step": 3127, "epoch": 2 }, { "type": "loss", "content": 0.002586707007139921, "timestamp": "2025-09-30 22:17:23.072745", "step": 3128, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:23.119279", "step": 3128, "epoch": 2 }, { "type": "loss", "content": 0.005804563872516155, "timestamp": "2025-09-30 22:17:23.129848", "step": 3129, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:23.165734", "step": 3129, "epoch": 2 }, { "type": "loss", "content": 0.004052955657243729, "timestamp": "2025-09-30 22:17:23.176208", "step": 3130, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:23.219929", "step": 3130, "epoch": 2 }, { "type": "loss", "content": 0.013816587626934052, "timestamp": "2025-09-30 22:17:23.227494", "step": 3131, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:23.262091", "step": 3131, "epoch": 2 }, { "type": "loss", "content": 0.000520117289852351, "timestamp": "2025-09-30 22:17:23.295307", "step": 3132, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:23.331290", "step": 3132, "epoch": 2 }, { "type": "loss", "content": 0.006719049997627735, "timestamp": "2025-09-30 22:17:23.342063", "step": 3133, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:23.393863", "step": 3133, "epoch": 2 }, { "type": "loss", "content": 0.0056020780466496944, "timestamp": "2025-09-30 22:17:23.402006", "step": 3134, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:23.441780", "step": 3134, "epoch": 2 }, { "type": "loss", "content": 0.008619586937129498, "timestamp": "2025-09-30 22:17:23.454105", "step": 3135, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:23.502541", "step": 3135, "epoch": 2 }, { "type": "loss", "content": 0.0013584374682977796, "timestamp": "2025-09-30 22:17:23.531297", "step": 3136, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:23.566069", "step": 3136, "epoch": 2 }, { "type": "loss", "content": 0.002616981277242303, "timestamp": "2025-09-30 22:17:23.576477", "step": 3137, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:23.614507", "step": 3137, "epoch": 2 }, { "type": "loss", "content": 0.0022322519216686487, "timestamp": "2025-09-30 22:17:23.626751", "step": 3138, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:23.668329", "step": 3138, "epoch": 2 }, { "type": "loss", "content": 0.013231469318270683, "timestamp": "2025-09-30 22:17:23.680715", "step": 3139, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:23.717910", "step": 3139, "epoch": 2 }, { "type": "loss", "content": 0.008670350536704063, "timestamp": "2025-09-30 22:17:23.751378", "step": 3140, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:23.787895", "step": 3140, "epoch": 2 }, { "type": "loss", "content": 0.0005706910160370171, "timestamp": "2025-09-30 22:17:23.800483", "step": 3141, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:23.845725", "step": 3141, "epoch": 2 }, { "type": "loss", "content": 0.00035360330366529524, "timestamp": "2025-09-30 22:17:23.858316", "step": 3142, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:23.908820", "step": 3142, "epoch": 2 }, { "type": "loss", "content": 0.0022373131942003965, "timestamp": "2025-09-30 22:17:23.919925", "step": 3143, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:23.955211", "step": 3143, "epoch": 2 }, { "type": "loss", "content": 0.001455026096664369, "timestamp": "2025-09-30 22:17:23.988421", "step": 3144, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:24.024154", "step": 3144, "epoch": 2 }, { "type": "loss", "content": 0.0032580397091805935, "timestamp": "2025-09-30 22:17:24.036804", "step": 3145, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:24.087963", "step": 3145, "epoch": 2 }, { "type": "loss", "content": 0.02811695635318756, "timestamp": "2025-09-30 22:17:24.101681", "step": 3146, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:24.152152", "step": 3146, "epoch": 2 }, { "type": "loss", "content": 0.002767069498077035, "timestamp": "2025-09-30 22:17:24.164697", "step": 3147, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:24.218777", "step": 3147, "epoch": 2 }, { "type": "loss", "content": 0.0006574054714292288, "timestamp": "2025-09-30 22:17:24.252182", "step": 3148, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:24.299821", "step": 3148, "epoch": 2 }, { "type": "loss", "content": 0.007816782221198082, "timestamp": "2025-09-30 22:17:24.308503", "step": 3149, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:24.355695", "step": 3149, "epoch": 2 }, { "type": "loss", "content": 0.004145421087741852, "timestamp": "2025-09-30 22:17:24.368298", "step": 3150, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:24.412544", "step": 3150, "epoch": 2 }, { "type": "loss", "content": 0.0063446518033742905, "timestamp": "2025-09-30 22:17:24.425116", "step": 3151, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:24.484197", "step": 3151, "epoch": 2 }, { "type": "loss", "content": 0.013051629066467285, "timestamp": "2025-09-30 22:17:24.517442", "step": 3152, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:24.557427", "step": 3152, "epoch": 2 }, { "type": "loss", "content": 0.004559283144772053, "timestamp": "2025-09-30 22:17:24.562797", "step": 3153, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:24.605492", "step": 3153, "epoch": 2 }, { "type": "loss", "content": 0.021444272249937057, "timestamp": "2025-09-30 22:17:24.617776", "step": 3154, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:24.662113", "step": 3154, "epoch": 2 }, { "type": "loss", "content": 0.008376072160899639, "timestamp": "2025-09-30 22:17:24.674610", "step": 3155, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:24.728068", "step": 3155, "epoch": 2 }, { "type": "loss", "content": 0.012633971869945526, "timestamp": "2025-09-30 22:17:24.759338", "step": 3156, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:24.793521", "step": 3156, "epoch": 2 }, { "type": "loss", "content": 0.007506215944886208, "timestamp": "2025-09-30 22:17:24.801387", "step": 3157, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:24.861407", "step": 3157, "epoch": 2 }, { "type": "loss", "content": 0.014985278248786926, "timestamp": "2025-09-30 22:17:24.871691", "step": 3158, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:24.904779", "step": 3158, "epoch": 2 }, { "type": "loss", "content": 0.00818850938230753, "timestamp": "2025-09-30 22:17:24.911849", "step": 3159, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:24.949071", "step": 3159, "epoch": 2 }, { "type": "loss", "content": 0.0067369369789958, "timestamp": "2025-09-30 22:17:24.977423", "step": 3160, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:25.012730", "step": 3160, "epoch": 2 }, { "type": "loss", "content": 0.008044580928981304, "timestamp": "2025-09-30 22:17:25.018023", "step": 3161, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:25.067848", "step": 3161, "epoch": 2 }, { "type": "loss", "content": 0.0038618387188762426, "timestamp": "2025-09-30 22:17:25.075516", "step": 3162, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:25.110873", "step": 3162, "epoch": 2 }, { "type": "loss", "content": 0.005019427742809057, "timestamp": "2025-09-30 22:17:25.118541", "step": 3163, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:25.152865", "step": 3163, "epoch": 2 }, { "type": "loss", "content": 0.0045925346203148365, "timestamp": "2025-09-30 22:17:25.181335", "step": 3164, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:25.223982", "step": 3164, "epoch": 2 }, { "type": "loss", "content": 0.016353843733668327, "timestamp": "2025-09-30 22:17:25.232460", "step": 3165, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:25.271158", "step": 3165, "epoch": 2 }, { "type": "loss", "content": 0.012923413887619972, "timestamp": "2025-09-30 22:17:25.281450", "step": 3166, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:25.329213", "step": 3166, "epoch": 2 }, { "type": "loss", "content": 0.0025689145550131798, "timestamp": "2025-09-30 22:17:25.337431", "step": 3167, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:25.383799", "step": 3167, "epoch": 2 }, { "type": "loss", "content": 0.009323082864284515, "timestamp": "2025-09-30 22:17:25.412374", "step": 3168, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:25.467545", "step": 3168, "epoch": 2 }, { "type": "loss", "content": 0.010412830859422684, "timestamp": "2025-09-30 22:17:25.472682", "step": 3169, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:17:25.507212", "step": 3169, "epoch": 2 }, { "type": "loss", "content": 0.007618281990289688, "timestamp": "2025-09-30 22:17:25.511411", "step": 3170, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:25.545617", "step": 3170, "epoch": 2 }, { "type": "loss", "content": 0.0035928383003920317, "timestamp": "2025-09-30 22:17:25.555996", "step": 3171, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:25.590404", "step": 3171, "epoch": 2 }, { "type": "loss", "content": 0.01205290574580431, "timestamp": "2025-09-30 22:17:25.623582", "step": 3172, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:25.675880", "step": 3172, "epoch": 2 }, { "type": "loss", "content": 0.00397237204015255, "timestamp": "2025-09-30 22:17:25.688931", "step": 3173, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:25.733974", "step": 3173, "epoch": 2 }, { "type": "loss", "content": 0.00367568526417017, "timestamp": "2025-09-30 22:17:25.741364", "step": 3174, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:25.799702", "step": 3174, "epoch": 2 }, { "type": "loss", "content": 0.0020569413900375366, "timestamp": "2025-09-30 22:17:25.811930", "step": 3175, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:25.855138", "step": 3175, "epoch": 2 }, { "type": "loss", "content": 0.008882638067007065, "timestamp": "2025-09-30 22:17:25.889345", "step": 3176, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:25.926983", "step": 3176, "epoch": 2 }, { "type": "loss", "content": 0.007198006846010685, "timestamp": "2025-09-30 22:17:25.935640", "step": 3177, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:25.982980", "step": 3177, "epoch": 2 }, { "type": "loss", "content": 0.006660681217908859, "timestamp": "2025-09-30 22:17:25.996379", "step": 3178, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:26.037650", "step": 3178, "epoch": 2 }, { "type": "loss", "content": 0.047912854701280594, "timestamp": "2025-09-30 22:17:26.050013", "step": 3179, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:26.088712", "step": 3179, "epoch": 2 }, { "type": "loss", "content": 0.004959320183843374, "timestamp": "2025-09-30 22:17:26.120478", "step": 3180, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:26.165091", "step": 3180, "epoch": 2 }, { "type": "loss", "content": 0.006877145264297724, "timestamp": "2025-09-30 22:17:26.172951", "step": 3181, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:26.214096", "step": 3181, "epoch": 2 }, { "type": "loss", "content": 0.002868048846721649, "timestamp": "2025-09-30 22:17:26.226652", "step": 3182, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:26.274881", "step": 3182, "epoch": 2 }, { "type": "loss", "content": 0.0076978690922260284, "timestamp": "2025-09-30 22:17:26.287070", "step": 3183, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:26.328298", "step": 3183, "epoch": 2 }, { "type": "loss", "content": 0.012668581679463387, "timestamp": "2025-09-30 22:17:26.361439", "step": 3184, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:26.396502", "step": 3184, "epoch": 2 }, { "type": "loss", "content": 0.010230972431600094, "timestamp": "2025-09-30 22:17:26.404439", "step": 3185, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:26.439228", "step": 3185, "epoch": 2 }, { "type": "loss", "content": 0.011350931599736214, "timestamp": "2025-09-30 22:17:26.450371", "step": 3186, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:26.490598", "step": 3186, "epoch": 2 }, { "type": "loss", "content": 0.007605713326483965, "timestamp": "2025-09-30 22:17:26.501057", "step": 3187, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:26.535283", "step": 3187, "epoch": 2 }, { "type": "loss", "content": 0.009302949532866478, "timestamp": "2025-09-30 22:17:26.566550", "step": 3188, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:26.609061", "step": 3188, "epoch": 2 }, { "type": "loss", "content": 0.006530994549393654, "timestamp": "2025-09-30 22:17:26.619059", "step": 3189, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:26.663734", "step": 3189, "epoch": 2 }, { "type": "loss", "content": 0.01011677272617817, "timestamp": "2025-09-30 22:17:26.676135", "step": 3190, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:26.710463", "step": 3190, "epoch": 2 }, { "type": "loss", "content": 0.007625059224665165, "timestamp": "2025-09-30 22:17:26.721483", "step": 3191, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:26.759797", "step": 3191, "epoch": 2 }, { "type": "loss", "content": 0.017014721408486366, "timestamp": "2025-09-30 22:17:26.794495", "step": 3192, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:26.832516", "step": 3192, "epoch": 2 }, { "type": "loss", "content": 0.006755263078957796, "timestamp": "2025-09-30 22:17:26.843346", "step": 3193, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:26.890062", "step": 3193, "epoch": 2 }, { "type": "loss", "content": 0.007832643575966358, "timestamp": "2025-09-30 22:17:26.898058", "step": 3194, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:26.932531", "step": 3194, "epoch": 2 }, { "type": "loss", "content": 0.010002926923334599, "timestamp": "2025-09-30 22:17:26.945069", "step": 3195, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:26.978892", "step": 3195, "epoch": 2 }, { "type": "loss", "content": 0.01744220219552517, "timestamp": "2025-09-30 22:17:27.007215", "step": 3196, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:27.043984", "step": 3196, "epoch": 2 }, { "type": "loss", "content": 0.007402005139738321, "timestamp": "2025-09-30 22:17:27.052765", "step": 3197, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:27.089681", "step": 3197, "epoch": 2 }, { "type": "loss", "content": 0.008514383807778358, "timestamp": "2025-09-30 22:17:27.100784", "step": 3198, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:27.134732", "step": 3198, "epoch": 2 }, { "type": "loss", "content": 0.0062779998406767845, "timestamp": "2025-09-30 22:17:27.141876", "step": 3199, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:27.183349", "step": 3199, "epoch": 2 }, { "type": "loss", "content": 0.011944870464503765, "timestamp": "2025-09-30 22:17:27.211784", "step": 3200, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:27.254086", "step": 3200, "epoch": 2 }, { "type": "loss", "content": 0.007012359332293272, "timestamp": "2025-09-30 22:17:27.258677", "step": 3201, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:17:27.298105", "step": 3201, "epoch": 2 }, { "type": "loss", "content": 0.009369988925755024, "timestamp": "2025-09-30 22:17:27.302521", "step": 3202, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:27.357373", "step": 3202, "epoch": 2 }, { "type": "loss", "content": 0.0048002698458731174, "timestamp": "2025-09-30 22:17:27.364401", "step": 3203, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:27.400172", "step": 3203, "epoch": 2 }, { "type": "loss", "content": 0.008321247063577175, "timestamp": "2025-09-30 22:17:27.431280", "step": 3204, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:27.466594", "step": 3204, "epoch": 2 }, { "type": "loss", "content": 0.00541403004899621, "timestamp": "2025-09-30 22:17:27.474584", "step": 3205, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:27.513087", "step": 3205, "epoch": 2 }, { "type": "loss", "content": 0.015202262438833714, "timestamp": "2025-09-30 22:17:27.525629", "step": 3206, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:27.581054", "step": 3206, "epoch": 2 }, { "type": "loss", "content": 0.013311178423464298, "timestamp": "2025-09-30 22:17:27.594792", "step": 3207, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:27.654637", "step": 3207, "epoch": 2 }, { "type": "loss", "content": 0.0028243535198271275, "timestamp": "2025-09-30 22:17:27.683283", "step": 3208, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:27.717994", "step": 3208, "epoch": 2 }, { "type": "loss", "content": 0.0033892698120325804, "timestamp": "2025-09-30 22:17:27.723271", "step": 3209, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:27.759010", "step": 3209, "epoch": 2 }, { "type": "loss", "content": 0.010153659619390965, "timestamp": "2025-09-30 22:17:27.766708", "step": 3210, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:27.809288", "step": 3210, "epoch": 2 }, { "type": "loss", "content": 0.012320290319621563, "timestamp": "2025-09-30 22:17:27.820286", "step": 3211, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:27.863111", "step": 3211, "epoch": 2 }, { "type": "loss", "content": 0.007215319201350212, "timestamp": "2025-09-30 22:17:27.891293", "step": 3212, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:27.936336", "step": 3212, "epoch": 2 }, { "type": "loss", "content": 0.015331793576478958, "timestamp": "2025-09-30 22:17:27.944240", "step": 3213, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:27.982474", "step": 3213, "epoch": 2 }, { "type": "loss", "content": 0.009551141411066055, "timestamp": "2025-09-30 22:17:27.996280", "step": 3214, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:28.035491", "step": 3214, "epoch": 2 }, { "type": "loss", "content": 0.005144950468093157, "timestamp": "2025-09-30 22:17:28.045610", "step": 3215, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:28.084333", "step": 3215, "epoch": 2 }, { "type": "loss", "content": 0.006618922110646963, "timestamp": "2025-09-30 22:17:28.116243", "step": 3216, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:28.151348", "step": 3216, "epoch": 2 }, { "type": "loss", "content": 0.006120654754340649, "timestamp": "2025-09-30 22:17:28.159966", "step": 3217, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:28.195262", "step": 3217, "epoch": 2 }, { "type": "loss", "content": 0.00691972067579627, "timestamp": "2025-09-30 22:17:28.207788", "step": 3218, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:28.249274", "step": 3218, "epoch": 2 }, { "type": "loss", "content": 0.0032606294844299555, "timestamp": "2025-09-30 22:17:28.259644", "step": 3219, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:28.294929", "step": 3219, "epoch": 2 }, { "type": "loss", "content": 0.007216659840196371, "timestamp": "2025-09-30 22:17:28.326023", "step": 3220, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:17:31.178026", "step": 3220, "epoch": 2 }, { "type": "pplx", "content": 5.789587159789891, "timestamp": "2025-09-30 22:17:31.180389", "step": 3220, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:31.224306", "step": 3220, "epoch": 2 }, { "type": "loss", "content": 0.005082657095044851, "timestamp": "2025-09-30 22:17:31.233090", "step": 3221, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:31.280156", "step": 3221, "epoch": 2 }, { "type": "loss", "content": 0.008041946217417717, "timestamp": "2025-09-30 22:17:31.291004", "step": 3222, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:31.338897", "step": 3222, "epoch": 2 }, { "type": "loss", "content": 0.002013694727793336, "timestamp": "2025-09-30 22:17:31.346168", "step": 3223, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:31.394610", "step": 3223, "epoch": 2 }, { "type": "loss", "content": 0.008580495603382587, "timestamp": "2025-09-30 22:17:31.426231", "step": 3224, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:31.462942", "step": 3224, "epoch": 2 }, { "type": "loss", "content": 0.006691499147564173, "timestamp": "2025-09-30 22:17:31.476016", "step": 3225, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:31.524494", "step": 3225, "epoch": 2 }, { "type": "loss", "content": 0.011357761919498444, "timestamp": "2025-09-30 22:17:31.532316", "step": 3226, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:31.584656", "step": 3226, "epoch": 2 }, { "type": "loss", "content": 0.017691265791654587, "timestamp": "2025-09-30 22:17:31.598351", "step": 3227, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:31.638250", "step": 3227, "epoch": 2 }, { "type": "loss", "content": 0.003643968142569065, "timestamp": "2025-09-30 22:17:31.672341", "step": 3228, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:31.719198", "step": 3228, "epoch": 2 }, { "type": "loss", "content": 0.00515143945813179, "timestamp": "2025-09-30 22:17:31.728992", "step": 3229, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:31.788540", "step": 3229, "epoch": 2 }, { "type": "loss", "content": 0.012103057466447353, "timestamp": "2025-09-30 22:17:31.801964", "step": 3230, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:31.847874", "step": 3230, "epoch": 2 }, { "type": "loss", "content": 0.004100447986274958, "timestamp": "2025-09-30 22:17:31.858011", "step": 3231, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:31.893545", "step": 3231, "epoch": 2 }, { "type": "loss", "content": 0.012488999404013157, "timestamp": "2025-09-30 22:17:31.925254", "step": 3232, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:31.960712", "step": 3232, "epoch": 2 }, { "type": "loss", "content": 0.011300384066998959, "timestamp": "2025-09-30 22:17:31.965460", "step": 3233, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:32.001360", "step": 3233, "epoch": 2 }, { "type": "loss", "content": 0.01623716950416565, "timestamp": "2025-09-30 22:17:32.008978", "step": 3234, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:32.042234", "step": 3234, "epoch": 2 }, { "type": "loss", "content": 0.004639583174139261, "timestamp": "2025-09-30 22:17:32.049858", "step": 3235, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:32.086210", "step": 3235, "epoch": 2 }, { "type": "loss", "content": 0.0062539177015423775, "timestamp": "2025-09-30 22:17:32.114714", "step": 3236, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:32.150941", "step": 3236, "epoch": 2 }, { "type": "loss", "content": 0.0031055191066116095, "timestamp": "2025-09-30 22:17:32.163505", "step": 3237, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:32.204379", "step": 3237, "epoch": 2 }, { "type": "loss", "content": 0.004861655179411173, "timestamp": "2025-09-30 22:17:32.215314", "step": 3238, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:32.274324", "step": 3238, "epoch": 2 }, { "type": "loss", "content": 0.005641878582537174, "timestamp": "2025-09-30 22:17:32.285251", "step": 3239, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:32.346365", "step": 3239, "epoch": 2 }, { "type": "loss", "content": 0.007063304539769888, "timestamp": "2025-09-30 22:17:32.381043", "step": 3240, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:32.415296", "step": 3240, "epoch": 2 }, { "type": "loss", "content": 0.008499303832650185, "timestamp": "2025-09-30 22:17:32.423942", "step": 3241, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:32.462734", "step": 3241, "epoch": 2 }, { "type": "loss", "content": 0.006319647654891014, "timestamp": "2025-09-30 22:17:32.473748", "step": 3242, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:32.527219", "step": 3242, "epoch": 2 }, { "type": "loss", "content": 0.006031288765370846, "timestamp": "2025-09-30 22:17:32.537411", "step": 3243, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:32.587161", "step": 3243, "epoch": 2 }, { "type": "loss", "content": 0.00663584191352129, "timestamp": "2025-09-30 22:17:32.618294", "step": 3244, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:32.668909", "step": 3244, "epoch": 2 }, { "type": "loss", "content": 0.003447463968768716, "timestamp": "2025-09-30 22:17:32.676773", "step": 3245, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:32.721416", "step": 3245, "epoch": 2 }, { "type": "loss", "content": 0.002404881175607443, "timestamp": "2025-09-30 22:17:32.728481", "step": 3246, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:32.766877", "step": 3246, "epoch": 2 }, { "type": "loss", "content": 0.001494319294579327, "timestamp": "2025-09-30 22:17:32.777325", "step": 3247, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:32.814898", "step": 3247, "epoch": 2 }, { "type": "loss", "content": 0.0030386210419237614, "timestamp": "2025-09-30 22:17:32.843751", "step": 3248, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:32.880359", "step": 3248, "epoch": 2 }, { "type": "loss", "content": 0.003679205197840929, "timestamp": "2025-09-30 22:17:32.892985", "step": 3249, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:32.929262", "step": 3249, "epoch": 2 }, { "type": "loss", "content": 0.012156999669969082, "timestamp": "2025-09-30 22:17:32.937238", "step": 3250, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:32.985666", "step": 3250, "epoch": 2 }, { "type": "loss", "content": 0.00451328419148922, "timestamp": "2025-09-30 22:17:32.996745", "step": 3251, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:33.059606", "step": 3251, "epoch": 2 }, { "type": "loss", "content": 0.010425938293337822, "timestamp": "2025-09-30 22:17:33.093081", "step": 3252, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:33.129960", "step": 3252, "epoch": 2 }, { "type": "loss", "content": 0.009341493248939514, "timestamp": "2025-09-30 22:17:33.138700", "step": 3253, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:33.175675", "step": 3253, "epoch": 2 }, { "type": "loss", "content": 0.014587215147912502, "timestamp": "2025-09-30 22:17:33.188089", "step": 3254, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:33.233720", "step": 3254, "epoch": 2 }, { "type": "loss", "content": 0.02171679027378559, "timestamp": "2025-09-30 22:17:33.241555", "step": 3255, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:33.290111", "step": 3255, "epoch": 2 }, { "type": "loss", "content": 0.024895792827010155, "timestamp": "2025-09-30 22:17:33.321677", "step": 3256, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:33.364219", "step": 3256, "epoch": 2 }, { "type": "loss", "content": 0.004205517005175352, "timestamp": "2025-09-30 22:17:33.374857", "step": 3257, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:33.423616", "step": 3257, "epoch": 2 }, { "type": "loss", "content": 0.005508000962436199, "timestamp": "2025-09-30 22:17:33.435985", "step": 3258, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:33.495350", "step": 3258, "epoch": 2 }, { "type": "loss", "content": 0.0037440420128405094, "timestamp": "2025-09-30 22:17:33.508684", "step": 3259, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:33.555723", "step": 3259, "epoch": 2 }, { "type": "loss", "content": 0.010127577930688858, "timestamp": "2025-09-30 22:17:33.587730", "step": 3260, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:33.639550", "step": 3260, "epoch": 2 }, { "type": "loss", "content": 0.003603320801630616, "timestamp": "2025-09-30 22:17:33.644305", "step": 3261, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:33.685371", "step": 3261, "epoch": 2 }, { "type": "loss", "content": 0.008757252246141434, "timestamp": "2025-09-30 22:17:33.696481", "step": 3262, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:33.740643", "step": 3262, "epoch": 2 }, { "type": "loss", "content": 0.002329410519450903, "timestamp": "2025-09-30 22:17:33.749796", "step": 3263, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:33.795534", "step": 3263, "epoch": 2 }, { "type": "loss", "content": 0.007676136679947376, "timestamp": "2025-09-30 22:17:33.826621", "step": 3264, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:33.880600", "step": 3264, "epoch": 2 }, { "type": "loss", "content": 0.008814936503767967, "timestamp": "2025-09-30 22:17:33.889485", "step": 3265, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:33.958757", "step": 3265, "epoch": 2 }, { "type": "loss", "content": 0.005114632658660412, "timestamp": "2025-09-30 22:17:33.966716", "step": 3266, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:34.004810", "step": 3266, "epoch": 2 }, { "type": "loss", "content": 0.008615276776254177, "timestamp": "2025-09-30 22:17:34.015924", "step": 3267, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:34.067863", "step": 3267, "epoch": 2 }, { "type": "loss", "content": 0.004307322669774294, "timestamp": "2025-09-30 22:17:34.095939", "step": 3268, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:34.146349", "step": 3268, "epoch": 2 }, { "type": "loss", "content": 0.007383343297988176, "timestamp": "2025-09-30 22:17:34.151313", "step": 3269, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:34.185988", "step": 3269, "epoch": 2 }, { "type": "loss", "content": 0.0019633248448371887, "timestamp": "2025-09-30 22:17:34.192853", "step": 3270, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:17:34.237473", "step": 3270, "epoch": 2 }, { "type": "loss", "content": 0.002463744254782796, "timestamp": "2025-09-30 22:17:34.246953", "step": 3271, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:34.288958", "step": 3271, "epoch": 2 }, { "type": "loss", "content": 0.00518502201884985, "timestamp": "2025-09-30 22:17:34.320141", "step": 3272, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:34.362128", "step": 3272, "epoch": 2 }, { "type": "loss", "content": 0.011479921638965607, "timestamp": "2025-09-30 22:17:34.372316", "step": 3273, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:34.418658", "step": 3273, "epoch": 2 }, { "type": "loss", "content": 0.0314481146633625, "timestamp": "2025-09-30 22:17:34.426482", "step": 3274, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:17:34.466740", "step": 3274, "epoch": 2 }, { "type": "loss", "content": 0.005712313577532768, "timestamp": "2025-09-30 22:17:34.470856", "step": 3275, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:34.512323", "step": 3275, "epoch": 2 }, { "type": "loss", "content": 0.00974821113049984, "timestamp": "2025-09-30 22:17:34.540050", "step": 3276, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:17:34.574917", "step": 3276, "epoch": 2 }, { "type": "loss", "content": 0.001180569757707417, "timestamp": "2025-09-30 22:17:34.578067", "step": 3277, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:34.618444", "step": 3277, "epoch": 2 }, { "type": "loss", "content": 0.0017637682612985373, "timestamp": "2025-09-30 22:17:34.625588", "step": 3278, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:34.663061", "step": 3278, "epoch": 2 }, { "type": "loss", "content": 0.010607711970806122, "timestamp": "2025-09-30 22:17:34.671010", "step": 3279, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:34.708105", "step": 3279, "epoch": 2 }, { "type": "loss", "content": 0.007635840680450201, "timestamp": "2025-09-30 22:17:34.739223", "step": 3280, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:34.785410", "step": 3280, "epoch": 2 }, { "type": "loss", "content": 0.004081249237060547, "timestamp": "2025-09-30 22:17:34.791094", "step": 3281, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:34.829227", "step": 3281, "epoch": 2 }, { "type": "loss", "content": 0.0059111518785357475, "timestamp": "2025-09-30 22:17:34.839676", "step": 3282, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:34.901873", "step": 3282, "epoch": 2 }, { "type": "loss", "content": 0.0024872845970094204, "timestamp": "2025-09-30 22:17:34.912871", "step": 3283, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:34.950458", "step": 3283, "epoch": 2 }, { "type": "loss", "content": 0.004989683162420988, "timestamp": "2025-09-30 22:17:34.979509", "step": 3284, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:35.034244", "step": 3284, "epoch": 2 }, { "type": "loss", "content": 0.009897114709019661, "timestamp": "2025-09-30 22:17:35.039439", "step": 3285, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:35.075416", "step": 3285, "epoch": 2 }, { "type": "loss", "content": 0.005108351353555918, "timestamp": "2025-09-30 22:17:35.083242", "step": 3286, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:35.128331", "step": 3286, "epoch": 2 }, { "type": "loss", "content": 0.005318902898579836, "timestamp": "2025-09-30 22:17:35.136013", "step": 3287, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:35.170538", "step": 3287, "epoch": 2 }, { "type": "loss", "content": 0.012161512859165668, "timestamp": "2025-09-30 22:17:35.203664", "step": 3288, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:35.265403", "step": 3288, "epoch": 2 }, { "type": "loss", "content": 0.015478991903364658, "timestamp": "2025-09-30 22:17:35.270991", "step": 3289, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:35.304251", "step": 3289, "epoch": 2 }, { "type": "loss", "content": 0.0025228005833923817, "timestamp": "2025-09-30 22:17:35.312193", "step": 3290, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:35.349958", "step": 3290, "epoch": 2 }, { "type": "loss", "content": 0.0016414644196629524, "timestamp": "2025-09-30 22:17:35.363258", "step": 3291, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:35.397765", "step": 3291, "epoch": 2 }, { "type": "loss", "content": 0.005938539747148752, "timestamp": "2025-09-30 22:17:35.426600", "step": 3292, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:35.462537", "step": 3292, "epoch": 2 }, { "type": "loss", "content": 0.007894609123468399, "timestamp": "2025-09-30 22:17:35.471291", "step": 3293, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:35.516774", "step": 3293, "epoch": 2 }, { "type": "loss", "content": 0.0023811052087694407, "timestamp": "2025-09-30 22:17:35.529340", "step": 3294, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:35.566487", "step": 3294, "epoch": 2 }, { "type": "loss", "content": 0.008082598447799683, "timestamp": "2025-09-30 22:17:35.579860", "step": 3295, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:35.626022", "step": 3295, "epoch": 2 }, { "type": "loss", "content": 0.005375884938985109, "timestamp": "2025-09-30 22:17:35.653966", "step": 3296, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:17:35.696371", "step": 3296, "epoch": 2 }, { "type": "loss", "content": 0.00774806085973978, "timestamp": "2025-09-30 22:17:35.698592", "step": 3297, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:35.736937", "step": 3297, "epoch": 2 }, { "type": "loss", "content": 0.005233556963503361, "timestamp": "2025-09-30 22:17:35.746878", "step": 3298, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:35.785245", "step": 3298, "epoch": 2 }, { "type": "loss", "content": 0.006909824907779694, "timestamp": "2025-09-30 22:17:35.793074", "step": 3299, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:35.829508", "step": 3299, "epoch": 2 }, { "type": "loss", "content": 0.01188234519213438, "timestamp": "2025-09-30 22:17:35.857519", "step": 3300, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:35.892184", "step": 3300, "epoch": 2 }, { "type": "loss", "content": 0.013236339204013348, "timestamp": "2025-09-30 22:17:35.899979", "step": 3301, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:35.939161", "step": 3301, "epoch": 2 }, { "type": "loss", "content": 0.015004783868789673, "timestamp": "2025-09-30 22:17:35.949480", "step": 3302, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:17:35.992587", "step": 3302, "epoch": 2 }, { "type": "loss", "content": 0.009418581612408161, "timestamp": "2025-09-30 22:17:35.995227", "step": 3303, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:36.054667", "step": 3303, "epoch": 2 }, { "type": "loss", "content": 0.001688284333795309, "timestamp": "2025-09-30 22:17:36.082750", "step": 3304, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:36.137861", "step": 3304, "epoch": 2 }, { "type": "loss", "content": 0.005806570872664452, "timestamp": "2025-09-30 22:17:36.143385", "step": 3305, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:36.179188", "step": 3305, "epoch": 2 }, { "type": "loss", "content": 0.004986909683793783, "timestamp": "2025-09-30 22:17:36.186991", "step": 3306, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:36.223613", "step": 3306, "epoch": 2 }, { "type": "loss", "content": 0.010555509477853775, "timestamp": "2025-09-30 22:17:36.234679", "step": 3307, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:36.274583", "step": 3307, "epoch": 2 }, { "type": "loss", "content": 0.013262015767395496, "timestamp": "2025-09-30 22:17:36.303415", "step": 3308, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:36.353681", "step": 3308, "epoch": 2 }, { "type": "loss", "content": 0.01061546616256237, "timestamp": "2025-09-30 22:17:36.363422", "step": 3309, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:36.403844", "step": 3309, "epoch": 2 }, { "type": "loss", "content": 0.0010201644618064165, "timestamp": "2025-09-30 22:17:36.411451", "step": 3310, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:36.447811", "step": 3310, "epoch": 2 }, { "type": "loss", "content": 0.00881708599627018, "timestamp": "2025-09-30 22:17:36.460370", "step": 3311, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:36.495146", "step": 3311, "epoch": 2 }, { "type": "loss", "content": 0.010540482588112354, "timestamp": "2025-09-30 22:17:36.528305", "step": 3312, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:36.571754", "step": 3312, "epoch": 2 }, { "type": "loss", "content": 0.01337836030870676, "timestamp": "2025-09-30 22:17:36.580569", "step": 3313, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:36.626113", "step": 3313, "epoch": 2 }, { "type": "loss", "content": 0.004954076837748289, "timestamp": "2025-09-30 22:17:36.637169", "step": 3314, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:36.673315", "step": 3314, "epoch": 2 }, { "type": "loss", "content": 0.00841221772134304, "timestamp": "2025-09-30 22:17:36.685101", "step": 3315, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:36.730308", "step": 3315, "epoch": 2 }, { "type": "loss", "content": 0.003916066139936447, "timestamp": "2025-09-30 22:17:36.758607", "step": 3316, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:36.808811", "step": 3316, "epoch": 2 }, { "type": "loss", "content": 0.01567855477333069, "timestamp": "2025-09-30 22:17:36.813652", "step": 3317, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:36.858349", "step": 3317, "epoch": 2 }, { "type": "loss", "content": 0.004972139373421669, "timestamp": "2025-09-30 22:17:36.869366", "step": 3318, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:36.916073", "step": 3318, "epoch": 2 }, { "type": "loss", "content": 0.007721779402345419, "timestamp": "2025-09-30 22:17:36.926473", "step": 3319, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:36.973344", "step": 3319, "epoch": 2 }, { "type": "loss", "content": 0.005826642271131277, "timestamp": "2025-09-30 22:17:37.006318", "step": 3320, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:37.050434", "step": 3320, "epoch": 2 }, { "type": "loss", "content": 0.004990190267562866, "timestamp": "2025-09-30 22:17:37.058523", "step": 3321, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:37.105387", "step": 3321, "epoch": 2 }, { "type": "loss", "content": 0.018866149708628654, "timestamp": "2025-09-30 22:17:37.112991", "step": 3322, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:37.161559", "step": 3322, "epoch": 2 }, { "type": "loss", "content": 0.005869357846677303, "timestamp": "2025-09-30 22:17:37.169419", "step": 3323, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:37.225011", "step": 3323, "epoch": 2 }, { "type": "loss", "content": 0.003536415984854102, "timestamp": "2025-09-30 22:17:37.256051", "step": 3324, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:37.292119", "step": 3324, "epoch": 2 }, { "type": "loss", "content": 0.026185350492596626, "timestamp": "2025-09-30 22:17:37.300786", "step": 3325, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:37.352448", "step": 3325, "epoch": 2 }, { "type": "loss", "content": 0.0053767370991408825, "timestamp": "2025-09-30 22:17:37.363515", "step": 3326, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:37.406920", "step": 3326, "epoch": 2 }, { "type": "loss", "content": 0.002467031590640545, "timestamp": "2025-09-30 22:17:37.418091", "step": 3327, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:37.477631", "step": 3327, "epoch": 2 }, { "type": "loss", "content": 0.005745346192270517, "timestamp": "2025-09-30 22:17:37.512227", "step": 3328, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:37.562587", "step": 3328, "epoch": 2 }, { "type": "loss", "content": 0.015376402996480465, "timestamp": "2025-09-30 22:17:37.575631", "step": 3329, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:37.612460", "step": 3329, "epoch": 2 }, { "type": "loss", "content": 0.010871312581002712, "timestamp": "2025-09-30 22:17:37.619553", "step": 3330, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:37.655803", "step": 3330, "epoch": 2 }, { "type": "loss", "content": 0.00525900861248374, "timestamp": "2025-09-30 22:17:37.668303", "step": 3331, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:17:37.726102", "step": 3331, "epoch": 2 }, { "type": "loss", "content": 0.004298144951462746, "timestamp": "2025-09-30 22:17:37.764628", "step": 3332, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:37.801609", "step": 3332, "epoch": 2 }, { "type": "loss", "content": 0.01475045271217823, "timestamp": "2025-09-30 22:17:37.811527", "step": 3333, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:37.851595", "step": 3333, "epoch": 2 }, { "type": "loss", "content": 0.0043676551431417465, "timestamp": "2025-09-30 22:17:37.863844", "step": 3334, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:37.909794", "step": 3334, "epoch": 2 }, { "type": "loss", "content": 0.006549290381371975, "timestamp": "2025-09-30 22:17:37.920038", "step": 3335, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:17:40.628791", "step": 3335, "epoch": 2 }, { "type": "pplx", "content": 5.908082136822788, "timestamp": "2025-09-30 22:17:40.633117", "step": 3335, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:40.667904", "step": 3335, "epoch": 2 }, { "type": "loss", "content": 0.008388176560401917, "timestamp": "2025-09-30 22:17:40.697972", "step": 3336, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:40.736012", "step": 3336, "epoch": 2 }, { "type": "loss", "content": 0.015180687420070171, "timestamp": "2025-09-30 22:17:40.742334", "step": 3337, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:40.780621", "step": 3337, "epoch": 2 }, { "type": "loss", "content": 0.008127564564347267, "timestamp": "2025-09-30 22:17:40.788041", "step": 3338, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:40.827745", "step": 3338, "epoch": 2 }, { "type": "loss", "content": 0.004592224024236202, "timestamp": "2025-09-30 22:17:40.838664", "step": 3339, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:40.876597", "step": 3339, "epoch": 2 }, { "type": "loss", "content": 0.012107564136385918, "timestamp": "2025-09-30 22:17:40.907880", "step": 3340, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:40.944489", "step": 3340, "epoch": 2 }, { "type": "loss", "content": 0.0023953975178301334, "timestamp": "2025-09-30 22:17:40.957634", "step": 3341, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:40.996945", "step": 3341, "epoch": 2 }, { "type": "loss", "content": 0.007857408374547958, "timestamp": "2025-09-30 22:17:41.004601", "step": 3342, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:41.042145", "step": 3342, "epoch": 2 }, { "type": "loss", "content": 0.014286278747022152, "timestamp": "2025-09-30 22:17:41.049848", "step": 3343, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:41.085224", "step": 3343, "epoch": 2 }, { "type": "loss", "content": 0.008866170421242714, "timestamp": "2025-09-30 22:17:41.117114", "step": 3344, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:41.156705", "step": 3344, "epoch": 2 }, { "type": "loss", "content": 0.011578583158552647, "timestamp": "2025-09-30 22:17:41.165467", "step": 3345, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:41.205114", "step": 3345, "epoch": 2 }, { "type": "loss", "content": 0.002213649218901992, "timestamp": "2025-09-30 22:17:41.217691", "step": 3346, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:41.263687", "step": 3346, "epoch": 2 }, { "type": "loss", "content": 0.00874285213649273, "timestamp": "2025-09-30 22:17:41.277019", "step": 3347, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:41.320364", "step": 3347, "epoch": 2 }, { "type": "loss", "content": 0.005169060546904802, "timestamp": "2025-09-30 22:17:41.351437", "step": 3348, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:41.391340", "step": 3348, "epoch": 2 }, { "type": "loss", "content": 0.0063797226175665855, "timestamp": "2025-09-30 22:17:41.399993", "step": 3349, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:41.437305", "step": 3349, "epoch": 2 }, { "type": "loss", "content": 0.0072136567905545235, "timestamp": "2025-09-30 22:17:41.448354", "step": 3350, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:41.480755", "step": 3350, "epoch": 2 }, { "type": "loss", "content": 0.0035704094916582108, "timestamp": "2025-09-30 22:17:41.487804", "step": 3351, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:41.524211", "step": 3351, "epoch": 2 }, { "type": "loss", "content": 0.0043285614810884, "timestamp": "2025-09-30 22:17:41.552832", "step": 3352, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:41.601607", "step": 3352, "epoch": 2 }, { "type": "loss", "content": 0.0018978390144184232, "timestamp": "2025-09-30 22:17:41.614676", "step": 3353, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:41.647926", "step": 3353, "epoch": 2 }, { "type": "loss", "content": 0.009518872015178204, "timestamp": "2025-09-30 22:17:41.659218", "step": 3354, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:41.694646", "step": 3354, "epoch": 2 }, { "type": "loss", "content": 0.003425286151468754, "timestamp": "2025-09-30 22:17:41.705715", "step": 3355, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:41.738857", "step": 3355, "epoch": 2 }, { "type": "loss", "content": 0.005201328080147505, "timestamp": "2025-09-30 22:17:41.770107", "step": 3356, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:41.804820", "step": 3356, "epoch": 2 }, { "type": "loss", "content": 0.003430295269936323, "timestamp": "2025-09-30 22:17:41.814749", "step": 3357, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:41.861476", "step": 3357, "epoch": 2 }, { "type": "loss", "content": 0.003906135680153966, "timestamp": "2025-09-30 22:17:41.873690", "step": 3358, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:41.912285", "step": 3358, "epoch": 2 }, { "type": "loss", "content": 0.0035449049901217222, "timestamp": "2025-09-30 22:17:41.920913", "step": 3359, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:41.957649", "step": 3359, "epoch": 2 }, { "type": "loss", "content": 0.009680942632257938, "timestamp": "2025-09-30 22:17:41.989504", "step": 3360, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:42.026858", "step": 3360, "epoch": 2 }, { "type": "loss", "content": 0.0037081630434840918, "timestamp": "2025-09-30 22:17:42.034827", "step": 3361, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:42.073550", "step": 3361, "epoch": 2 }, { "type": "loss", "content": 0.0030928482301533222, "timestamp": "2025-09-30 22:17:42.081169", "step": 3362, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:42.114832", "step": 3362, "epoch": 2 }, { "type": "loss", "content": 0.0008025756105780602, "timestamp": "2025-09-30 22:17:42.121925", "step": 3363, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:42.161439", "step": 3363, "epoch": 2 }, { "type": "loss", "content": 0.020035002380609512, "timestamp": "2025-09-30 22:17:42.190234", "step": 3364, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:42.225423", "step": 3364, "epoch": 2 }, { "type": "loss", "content": 0.0031864179763942957, "timestamp": "2025-09-30 22:17:42.231052", "step": 3365, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:42.276076", "step": 3365, "epoch": 2 }, { "type": "loss", "content": 0.004433062393218279, "timestamp": "2025-09-30 22:17:42.288380", "step": 3366, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:42.329253", "step": 3366, "epoch": 2 }, { "type": "loss", "content": 0.0015743272379040718, "timestamp": "2025-09-30 22:17:42.342599", "step": 3367, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:42.386007", "step": 3367, "epoch": 2 }, { "type": "loss", "content": 0.014348004944622517, "timestamp": "2025-09-30 22:17:42.414997", "step": 3368, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:42.458692", "step": 3368, "epoch": 2 }, { "type": "loss", "content": 0.018920985981822014, "timestamp": "2025-09-30 22:17:42.471067", "step": 3369, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:42.509253", "step": 3369, "epoch": 2 }, { "type": "loss", "content": 0.010625313967466354, "timestamp": "2025-09-30 22:17:42.516494", "step": 3370, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:42.559117", "step": 3370, "epoch": 2 }, { "type": "loss", "content": 0.004841940477490425, "timestamp": "2025-09-30 22:17:42.570219", "step": 3371, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:42.618317", "step": 3371, "epoch": 2 }, { "type": "loss", "content": 0.011649803258478642, "timestamp": "2025-09-30 22:17:42.652487", "step": 3372, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:42.686308", "step": 3372, "epoch": 2 }, { "type": "loss", "content": 0.006236497312784195, "timestamp": "2025-09-30 22:17:42.694305", "step": 3373, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:42.741176", "step": 3373, "epoch": 2 }, { "type": "loss", "content": 0.004768477752804756, "timestamp": "2025-09-30 22:17:42.752463", "step": 3374, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:42.790754", "step": 3374, "epoch": 2 }, { "type": "loss", "content": 0.0019870330579578876, "timestamp": "2025-09-30 22:17:42.804464", "step": 3375, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:42.843290", "step": 3375, "epoch": 2 }, { "type": "loss", "content": 0.003379875561222434, "timestamp": "2025-09-30 22:17:42.876765", "step": 3376, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:42.916730", "step": 3376, "epoch": 2 }, { "type": "loss", "content": 0.005048112478107214, "timestamp": "2025-09-30 22:17:42.927546", "step": 3377, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:42.970004", "step": 3377, "epoch": 2 }, { "type": "loss", "content": 0.006450078450143337, "timestamp": "2025-09-30 22:17:42.982568", "step": 3378, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:43.020617", "step": 3378, "epoch": 2 }, { "type": "loss", "content": 0.011793630197644234, "timestamp": "2025-09-30 22:17:43.031898", "step": 3379, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:43.065366", "step": 3379, "epoch": 2 }, { "type": "loss", "content": 0.0065678758546710014, "timestamp": "2025-09-30 22:17:43.098885", "step": 3380, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:43.152071", "step": 3380, "epoch": 2 }, { "type": "loss", "content": 0.0019863827619701624, "timestamp": "2025-09-30 22:17:43.164739", "step": 3381, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:43.201711", "step": 3381, "epoch": 2 }, { "type": "loss", "content": 0.006069038063287735, "timestamp": "2025-09-30 22:17:43.214045", "step": 3382, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:43.251080", "step": 3382, "epoch": 2 }, { "type": "loss", "content": 0.00993566308170557, "timestamp": "2025-09-30 22:17:43.263549", "step": 3383, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:43.304789", "step": 3383, "epoch": 2 }, { "type": "loss", "content": 0.004957782104611397, "timestamp": "2025-09-30 22:17:43.338001", "step": 3384, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:43.379473", "step": 3384, "epoch": 2 }, { "type": "loss", "content": 0.0007455699960701168, "timestamp": "2025-09-30 22:17:43.387422", "step": 3385, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:43.427876", "step": 3385, "epoch": 2 }, { "type": "loss", "content": 0.0004897300386801362, "timestamp": "2025-09-30 22:17:43.441570", "step": 3386, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:43.476274", "step": 3386, "epoch": 2 }, { "type": "loss", "content": 0.005194354802370071, "timestamp": "2025-09-30 22:17:43.488632", "step": 3387, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:43.530174", "step": 3387, "epoch": 2 }, { "type": "loss", "content": 0.0049303011037409306, "timestamp": "2025-09-30 22:17:43.563369", "step": 3388, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:43.607448", "step": 3388, "epoch": 2 }, { "type": "loss", "content": 0.0001996582723222673, "timestamp": "2025-09-30 22:17:43.617338", "step": 3389, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:43.655086", "step": 3389, "epoch": 2 }, { "type": "loss", "content": 0.0012007271870970726, "timestamp": "2025-09-30 22:17:43.668425", "step": 3390, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:43.712980", "step": 3390, "epoch": 2 }, { "type": "loss", "content": 0.011003163643181324, "timestamp": "2025-09-30 22:17:43.725312", "step": 3391, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:43.758097", "step": 3391, "epoch": 2 }, { "type": "loss", "content": 0.0011313065188005567, "timestamp": "2025-09-30 22:17:43.791422", "step": 3392, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:43.826884", "step": 3392, "epoch": 2 }, { "type": "loss", "content": 0.00030222817440517247, "timestamp": "2025-09-30 22:17:43.839562", "step": 3393, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:43.879451", "step": 3393, "epoch": 2 }, { "type": "loss", "content": 0.015398084186017513, "timestamp": "2025-09-30 22:17:43.892044", "step": 3394, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:43.930176", "step": 3394, "epoch": 2 }, { "type": "loss", "content": 0.0030468441545963287, "timestamp": "2025-09-30 22:17:43.941370", "step": 3395, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:43.992633", "step": 3395, "epoch": 2 }, { "type": "loss", "content": 0.013181681744754314, "timestamp": "2025-09-30 22:17:44.026950", "step": 3396, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:44.061030", "step": 3396, "epoch": 2 }, { "type": "loss", "content": 0.0019466944504529238, "timestamp": "2025-09-30 22:17:44.071857", "step": 3397, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:44.114840", "step": 3397, "epoch": 2 }, { "type": "loss", "content": 0.014047914184629917, "timestamp": "2025-09-30 22:17:44.125973", "step": 3398, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:44.168860", "step": 3398, "epoch": 2 }, { "type": "loss", "content": 0.00035534953349269927, "timestamp": "2025-09-30 22:17:44.182279", "step": 3399, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:44.220083", "step": 3399, "epoch": 2 }, { "type": "loss", "content": 0.000862656335812062, "timestamp": "2025-09-30 22:17:44.249110", "step": 3400, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:44.297001", "step": 3400, "epoch": 2 }, { "type": "loss", "content": 0.0008597745327278972, "timestamp": "2025-09-30 22:17:44.305138", "step": 3401, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:44.340173", "step": 3401, "epoch": 2 }, { "type": "loss", "content": 0.0005968649056740105, "timestamp": "2025-09-30 22:17:44.351254", "step": 3402, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:44.397077", "step": 3402, "epoch": 2 }, { "type": "loss", "content": 0.00504825497046113, "timestamp": "2025-09-30 22:17:44.410822", "step": 3403, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:44.445798", "step": 3403, "epoch": 2 }, { "type": "loss", "content": 0.000195363987586461, "timestamp": "2025-09-30 22:17:44.474648", "step": 3404, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:44.506608", "step": 3404, "epoch": 2 }, { "type": "loss", "content": 0.0005043658311478794, "timestamp": "2025-09-30 22:17:44.514840", "step": 3405, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:44.552532", "step": 3405, "epoch": 2 }, { "type": "loss", "content": 0.0029634914826601744, "timestamp": "2025-09-30 22:17:44.566580", "step": 3406, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:44.611334", "step": 3406, "epoch": 2 }, { "type": "loss", "content": 0.002190728671848774, "timestamp": "2025-09-30 22:17:44.622488", "step": 3407, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:44.665511", "step": 3407, "epoch": 2 }, { "type": "loss", "content": 0.000769341888371855, "timestamp": "2025-09-30 22:17:44.696718", "step": 3408, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:44.733529", "step": 3408, "epoch": 2 }, { "type": "loss", "content": 0.0019631364848464727, "timestamp": "2025-09-30 22:17:44.742439", "step": 3409, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:44.780362", "step": 3409, "epoch": 2 }, { "type": "loss", "content": 0.012203425168991089, "timestamp": "2025-09-30 22:17:44.792794", "step": 3410, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:44.841594", "step": 3410, "epoch": 2 }, { "type": "loss", "content": 0.0056638531386852264, "timestamp": "2025-09-30 22:17:44.855016", "step": 3411, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:44.895881", "step": 3411, "epoch": 2 }, { "type": "loss", "content": 0.014068983495235443, "timestamp": "2025-09-30 22:17:44.927391", "step": 3412, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:44.960622", "step": 3412, "epoch": 2 }, { "type": "loss", "content": 0.0052124448120594025, "timestamp": "2025-09-30 22:17:44.968566", "step": 3413, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:45.007118", "step": 3413, "epoch": 2 }, { "type": "loss", "content": 0.003837403142824769, "timestamp": "2025-09-30 22:17:45.019580", "step": 3414, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:45.058590", "step": 3414, "epoch": 2 }, { "type": "loss", "content": 0.006270041223615408, "timestamp": "2025-09-30 22:17:45.069847", "step": 3415, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:45.103550", "step": 3415, "epoch": 2 }, { "type": "loss", "content": 0.027436494827270508, "timestamp": "2025-09-30 22:17:45.136973", "step": 3416, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:45.171601", "step": 3416, "epoch": 2 }, { "type": "loss", "content": 0.0063796150498092175, "timestamp": "2025-09-30 22:17:45.182102", "step": 3417, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:45.221814", "step": 3417, "epoch": 2 }, { "type": "loss", "content": 0.002977534895762801, "timestamp": "2025-09-30 22:17:45.234186", "step": 3418, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:45.276203", "step": 3418, "epoch": 2 }, { "type": "loss", "content": 0.008533847518265247, "timestamp": "2025-09-30 22:17:45.288575", "step": 3419, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:45.333244", "step": 3419, "epoch": 2 }, { "type": "loss", "content": 0.003514854470267892, "timestamp": "2025-09-30 22:17:45.367791", "step": 3420, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:45.404767", "step": 3420, "epoch": 2 }, { "type": "loss", "content": 0.0006031044758856297, "timestamp": "2025-09-30 22:17:45.417435", "step": 3421, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:17:45.475120", "step": 3421, "epoch": 2 }, { "type": "loss", "content": 0.0008695040596649051, "timestamp": "2025-09-30 22:17:45.491020", "step": 3422, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:17:45.539413", "step": 3422, "epoch": 2 }, { "type": "loss", "content": 0.0012744531268253922, "timestamp": "2025-09-30 22:17:45.555012", "step": 3423, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:45.592006", "step": 3423, "epoch": 2 }, { "type": "loss", "content": 0.0037018214352428913, "timestamp": "2025-09-30 22:17:45.626561", "step": 3424, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:45.669164", "step": 3424, "epoch": 2 }, { "type": "loss", "content": 0.01598265767097473, "timestamp": "2025-09-30 22:17:45.678089", "step": 3425, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:45.711075", "step": 3425, "epoch": 2 }, { "type": "loss", "content": 0.010136940516531467, "timestamp": "2025-09-30 22:17:45.722389", "step": 3426, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:45.762026", "step": 3426, "epoch": 2 }, { "type": "loss", "content": 0.01466772984713316, "timestamp": "2025-09-30 22:17:45.772482", "step": 3427, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:45.814834", "step": 3427, "epoch": 2 }, { "type": "loss", "content": 0.008609617128968239, "timestamp": "2025-09-30 22:17:45.848261", "step": 3428, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:45.888198", "step": 3428, "epoch": 2 }, { "type": "loss", "content": 0.010983388870954514, "timestamp": "2025-09-30 22:17:45.897285", "step": 3429, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:45.932513", "step": 3429, "epoch": 2 }, { "type": "loss", "content": 0.0038160153198987246, "timestamp": "2025-09-30 22:17:45.945009", "step": 3430, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:45.979710", "step": 3430, "epoch": 2 }, { "type": "loss", "content": 0.014309203252196312, "timestamp": "2025-09-30 22:17:45.990345", "step": 3431, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:46.028259", "step": 3431, "epoch": 2 }, { "type": "loss", "content": 0.003925409633666277, "timestamp": "2025-09-30 22:17:46.061632", "step": 3432, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:46.102008", "step": 3432, "epoch": 2 }, { "type": "loss", "content": 0.005317303352057934, "timestamp": "2025-09-30 22:17:46.114646", "step": 3433, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:46.150606", "step": 3433, "epoch": 2 }, { "type": "loss", "content": 0.011380461975932121, "timestamp": "2025-09-30 22:17:46.161786", "step": 3434, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:46.196231", "step": 3434, "epoch": 2 }, { "type": "loss", "content": 0.002438656520098448, "timestamp": "2025-09-30 22:17:46.203912", "step": 3435, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:46.244673", "step": 3435, "epoch": 2 }, { "type": "loss", "content": 0.002902389271184802, "timestamp": "2025-09-30 22:17:46.275794", "step": 3436, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:46.308790", "step": 3436, "epoch": 2 }, { "type": "loss", "content": 0.005086651537567377, "timestamp": "2025-09-30 22:17:46.317639", "step": 3437, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:46.356635", "step": 3437, "epoch": 2 }, { "type": "loss", "content": 0.0034991842694580555, "timestamp": "2025-09-30 22:17:46.369137", "step": 3438, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:46.416681", "step": 3438, "epoch": 2 }, { "type": "loss", "content": 0.008994112722575665, "timestamp": "2025-09-30 22:17:46.430526", "step": 3439, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:46.469464", "step": 3439, "epoch": 2 }, { "type": "loss", "content": 0.0012463306775316596, "timestamp": "2025-09-30 22:17:46.498898", "step": 3440, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:46.533581", "step": 3440, "epoch": 2 }, { "type": "loss", "content": 0.002525878604501486, "timestamp": "2025-09-30 22:17:46.543583", "step": 3441, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:46.587531", "step": 3441, "epoch": 2 }, { "type": "loss", "content": 0.003564007580280304, "timestamp": "2025-09-30 22:17:46.601407", "step": 3442, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:46.638808", "step": 3442, "epoch": 2 }, { "type": "loss", "content": 0.005786378402262926, "timestamp": "2025-09-30 22:17:46.649344", "step": 3443, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:46.685036", "step": 3443, "epoch": 2 }, { "type": "loss", "content": 0.005785984918475151, "timestamp": "2025-09-30 22:17:46.718258", "step": 3444, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:46.765835", "step": 3444, "epoch": 2 }, { "type": "loss", "content": 0.0027834847569465637, "timestamp": "2025-09-30 22:17:46.779153", "step": 3445, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:46.818020", "step": 3445, "epoch": 2 }, { "type": "loss", "content": 0.01306731253862381, "timestamp": "2025-09-30 22:17:46.830596", "step": 3446, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:46.868195", "step": 3446, "epoch": 2 }, { "type": "loss", "content": 0.0030403793789446354, "timestamp": "2025-09-30 22:17:46.881634", "step": 3447, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:46.916669", "step": 3447, "epoch": 2 }, { "type": "loss", "content": 0.004225566517561674, "timestamp": "2025-09-30 22:17:46.950929", "step": 3448, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:46.990716", "step": 3448, "epoch": 2 }, { "type": "loss", "content": 0.013057212345302105, "timestamp": "2025-09-30 22:17:47.001354", "step": 3449, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:47.040337", "step": 3449, "epoch": 2 }, { "type": "loss", "content": 0.009223368018865585, "timestamp": "2025-09-30 22:17:47.054046", "step": 3450, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:17:49.542905", "step": 3450, "epoch": 2 }, { "type": "pplx", "content": 6.117527749085999, "timestamp": "2025-09-30 22:17:49.547521", "step": 3450, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:49.583827", "step": 3450, "epoch": 2 }, { "type": "loss", "content": 0.019778696820139885, "timestamp": "2025-09-30 22:17:49.597174", "step": 3451, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:49.631448", "step": 3451, "epoch": 2 }, { "type": "loss", "content": 0.00811647716909647, "timestamp": "2025-09-30 22:17:49.664475", "step": 3452, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:49.699916", "step": 3452, "epoch": 2 }, { "type": "loss", "content": 0.008224857039749622, "timestamp": "2025-09-30 22:17:49.707212", "step": 3453, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:49.750293", "step": 3453, "epoch": 2 }, { "type": "loss", "content": 0.013419202528893948, "timestamp": "2025-09-30 22:17:49.761291", "step": 3454, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:49.793686", "step": 3454, "epoch": 2 }, { "type": "loss", "content": 0.010387551970779896, "timestamp": "2025-09-30 22:17:49.806297", "step": 3455, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:17:49.849767", "step": 3455, "epoch": 2 }, { "type": "loss", "content": 0.004591148346662521, "timestamp": "2025-09-30 22:17:49.886823", "step": 3456, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:49.933266", "step": 3456, "epoch": 2 }, { "type": "loss", "content": 0.00521214259788394, "timestamp": "2025-09-30 22:17:49.945955", "step": 3457, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:17:49.992345", "step": 3457, "epoch": 2 }, { "type": "loss", "content": 0.0060057747177779675, "timestamp": "2025-09-30 22:17:50.008046", "step": 3458, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:50.067097", "step": 3458, "epoch": 2 }, { "type": "loss", "content": 0.006628590170294046, "timestamp": "2025-09-30 22:17:50.079340", "step": 3459, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:50.113658", "step": 3459, "epoch": 2 }, { "type": "loss", "content": 0.006081595551222563, "timestamp": "2025-09-30 22:17:50.147100", "step": 3460, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:50.184360", "step": 3460, "epoch": 2 }, { "type": "loss", "content": 0.006910016760230064, "timestamp": "2025-09-30 22:17:50.196159", "step": 3461, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:50.241555", "step": 3461, "epoch": 2 }, { "type": "loss", "content": 0.013972374610602856, "timestamp": "2025-09-30 22:17:50.253865", "step": 3462, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:50.294472", "step": 3462, "epoch": 2 }, { "type": "loss", "content": 0.003051967127248645, "timestamp": "2025-09-30 22:17:50.308247", "step": 3463, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:17:50.340592", "step": 3463, "epoch": 2 }, { "type": "loss", "content": 0.003830001689493656, "timestamp": "2025-09-30 22:17:50.366939", "step": 3464, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:17:50.419176", "step": 3464, "epoch": 2 }, { "type": "loss", "content": 0.014183532446622849, "timestamp": "2025-09-30 22:17:50.434285", "step": 3465, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:17:50.484990", "step": 3465, "epoch": 2 }, { "type": "loss", "content": 0.01136015821248293, "timestamp": "2025-09-30 22:17:50.489566", "step": 3466, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:17:50.532623", "step": 3466, "epoch": 2 }, { "type": "loss", "content": 0.00248980475589633, "timestamp": "2025-09-30 22:17:50.548728", "step": 3467, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:50.590292", "step": 3467, "epoch": 2 }, { "type": "loss", "content": 0.006607360672205687, "timestamp": "2025-09-30 22:17:50.621798", "step": 3468, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:50.661099", "step": 3468, "epoch": 2 }, { "type": "loss", "content": 0.0017212983220815659, "timestamp": "2025-09-30 22:17:50.669748", "step": 3469, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:50.715904", "step": 3469, "epoch": 2 }, { "type": "loss", "content": 0.011916225776076317, "timestamp": "2025-09-30 22:17:50.726974", "step": 3470, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:50.761985", "step": 3470, "epoch": 2 }, { "type": "loss", "content": 0.007234348449856043, "timestamp": "2025-09-30 22:17:50.772521", "step": 3471, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:50.812777", "step": 3471, "epoch": 2 }, { "type": "loss", "content": 0.006829807534813881, "timestamp": "2025-09-30 22:17:50.845975", "step": 3472, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:50.882281", "step": 3472, "epoch": 2 }, { "type": "loss", "content": 0.005056047346442938, "timestamp": "2025-09-30 22:17:50.887237", "step": 3473, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:50.920750", "step": 3473, "epoch": 2 }, { "type": "loss", "content": 0.0025959117338061333, "timestamp": "2025-09-30 22:17:50.932979", "step": 3474, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:50.974300", "step": 3474, "epoch": 2 }, { "type": "loss", "content": 0.001975255785509944, "timestamp": "2025-09-30 22:17:50.985368", "step": 3475, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:51.025023", "step": 3475, "epoch": 2 }, { "type": "loss", "content": 0.01245441660284996, "timestamp": "2025-09-30 22:17:51.054026", "step": 3476, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:17:51.089583", "step": 3476, "epoch": 2 }, { "type": "loss", "content": 0.029889047145843506, "timestamp": "2025-09-30 22:17:51.098786", "step": 3477, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:51.134470", "step": 3477, "epoch": 2 }, { "type": "loss", "content": 0.007431174162775278, "timestamp": "2025-09-30 22:17:51.146807", "step": 3478, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:17:51.195084", "step": 3478, "epoch": 2 }, { "type": "loss", "content": 0.005341086536645889, "timestamp": "2025-09-30 22:17:51.211051", "step": 3479, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:17:51.254072", "step": 3479, "epoch": 2 }, { "type": "loss", "content": 0.004321379121392965, "timestamp": "2025-09-30 22:17:51.282018", "step": 3480, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:51.315880", "step": 3480, "epoch": 2 }, { "type": "loss", "content": 0.002522587776184082, "timestamp": "2025-09-30 22:17:51.326216", "step": 3481, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:51.362293", "step": 3481, "epoch": 2 }, { "type": "loss", "content": 0.016913603991270065, "timestamp": "2025-09-30 22:17:51.374628", "step": 3482, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:51.408980", "step": 3482, "epoch": 2 }, { "type": "loss", "content": 0.015241632238030434, "timestamp": "2025-09-30 22:17:51.419367", "step": 3483, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:51.467287", "step": 3483, "epoch": 2 }, { "type": "loss", "content": 0.0023043362889438868, "timestamp": "2025-09-30 22:17:51.502006", "step": 3484, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:51.536552", "step": 3484, "epoch": 2 }, { "type": "loss", "content": 0.014905478805303574, "timestamp": "2025-09-30 22:17:51.545160", "step": 3485, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:51.579022", "step": 3485, "epoch": 2 }, { "type": "loss", "content": 0.004559694789350033, "timestamp": "2025-09-30 22:17:51.591393", "step": 3486, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:51.625234", "step": 3486, "epoch": 2 }, { "type": "loss", "content": 0.023312093690037727, "timestamp": "2025-09-30 22:17:51.637802", "step": 3487, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:51.680058", "step": 3487, "epoch": 2 }, { "type": "loss", "content": 0.00656821159645915, "timestamp": "2025-09-30 22:17:51.713274", "step": 3488, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:51.748771", "step": 3488, "epoch": 2 }, { "type": "loss", "content": 0.00758517486974597, "timestamp": "2025-09-30 22:17:51.761837", "step": 3489, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:51.800343", "step": 3489, "epoch": 2 }, { "type": "loss", "content": 0.004730802029371262, "timestamp": "2025-09-30 22:17:51.813728", "step": 3490, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:51.850002", "step": 3490, "epoch": 2 }, { "type": "loss", "content": 0.00482148164883256, "timestamp": "2025-09-30 22:17:51.863406", "step": 3491, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:51.900098", "step": 3491, "epoch": 2 }, { "type": "loss", "content": 0.007809313479810953, "timestamp": "2025-09-30 22:17:51.933344", "step": 3492, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:51.968297", "step": 3492, "epoch": 2 }, { "type": "loss", "content": 0.0036748633719980717, "timestamp": "2025-09-30 22:17:51.978398", "step": 3493, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:52.011884", "step": 3493, "epoch": 2 }, { "type": "loss", "content": 0.008129787631332874, "timestamp": "2025-09-30 22:17:52.022186", "step": 3494, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:52.059863", "step": 3494, "epoch": 2 }, { "type": "loss", "content": 0.007218873593956232, "timestamp": "2025-09-30 22:17:52.067457", "step": 3495, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:52.105686", "step": 3495, "epoch": 2 }, { "type": "loss", "content": 0.012301747687160969, "timestamp": "2025-09-30 22:17:52.136866", "step": 3496, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:52.171717", "step": 3496, "epoch": 2 }, { "type": "loss", "content": 0.003263867227360606, "timestamp": "2025-09-30 22:17:52.184775", "step": 3497, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:52.218457", "step": 3497, "epoch": 2 }, { "type": "loss", "content": 0.012883387506008148, "timestamp": "2025-09-30 22:17:52.225621", "step": 3498, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:52.263542", "step": 3498, "epoch": 2 }, { "type": "loss", "content": 0.011431973427534103, "timestamp": "2025-09-30 22:17:52.277252", "step": 3499, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:52.310154", "step": 3499, "epoch": 2 }, { "type": "loss", "content": 0.006922499742358923, "timestamp": "2025-09-30 22:17:52.338898", "step": 3500, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 3500", "timestamp": "2025-09-30 22:17:57.308118", "step": 3500, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:57.353016", "step": 3500, "epoch": 2 }, { "type": "loss", "content": 0.0016732515068724751, "timestamp": "2025-09-30 22:17:57.359525", "step": 3501, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:57.391776", "step": 3501, "epoch": 2 }, { "type": "loss", "content": 0.0008775260648690164, "timestamp": "2025-09-30 22:17:57.404037", "step": 3502, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:57.438395", "step": 3502, "epoch": 2 }, { "type": "loss", "content": 0.0034057104494422674, "timestamp": "2025-09-30 22:17:57.445586", "step": 3503, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:57.483387", "step": 3503, "epoch": 2 }, { "type": "loss", "content": 0.0009829505579546094, "timestamp": "2025-09-30 22:17:57.517367", "step": 3504, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:57.562341", "step": 3504, "epoch": 2 }, { "type": "loss", "content": 0.014472831040620804, "timestamp": "2025-09-30 22:17:57.571036", "step": 3505, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:57.603373", "step": 3505, "epoch": 2 }, { "type": "loss", "content": 0.006887474562972784, "timestamp": "2025-09-30 22:17:57.611385", "step": 3506, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:57.645486", "step": 3506, "epoch": 2 }, { "type": "loss", "content": 0.006549729034304619, "timestamp": "2025-09-30 22:17:57.655734", "step": 3507, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:57.695999", "step": 3507, "epoch": 2 }, { "type": "loss", "content": 0.02756788209080696, "timestamp": "2025-09-30 22:17:57.724837", "step": 3508, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:57.762996", "step": 3508, "epoch": 2 }, { "type": "loss", "content": 0.010234571993350983, "timestamp": "2025-09-30 22:17:57.773464", "step": 3509, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:57.818115", "step": 3509, "epoch": 2 }, { "type": "loss", "content": 0.002638920210301876, "timestamp": "2025-09-30 22:17:57.831508", "step": 3510, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:57.874789", "step": 3510, "epoch": 2 }, { "type": "loss", "content": 0.006166706793010235, "timestamp": "2025-09-30 22:17:57.888566", "step": 3511, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:17:57.937779", "step": 3511, "epoch": 2 }, { "type": "loss", "content": 0.006264224648475647, "timestamp": "2025-09-30 22:17:57.974840", "step": 3512, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:58.017635", "step": 3512, "epoch": 2 }, { "type": "loss", "content": 0.008886733092367649, "timestamp": "2025-09-30 22:17:58.030306", "step": 3513, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:58.068493", "step": 3513, "epoch": 2 }, { "type": "loss", "content": 0.012475165538489819, "timestamp": "2025-09-30 22:17:58.081059", "step": 3514, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:58.121910", "step": 3514, "epoch": 2 }, { "type": "loss", "content": 0.004459563177078962, "timestamp": "2025-09-30 22:17:58.132995", "step": 3515, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:58.188590", "step": 3515, "epoch": 2 }, { "type": "loss", "content": 0.005173417739570141, "timestamp": "2025-09-30 22:17:58.223207", "step": 3516, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:17:58.260455", "step": 3516, "epoch": 2 }, { "type": "loss", "content": 0.011100078001618385, "timestamp": "2025-09-30 22:17:58.273770", "step": 3517, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:58.316037", "step": 3517, "epoch": 2 }, { "type": "loss", "content": 0.0042218053713440895, "timestamp": "2025-09-30 22:17:58.324028", "step": 3518, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:58.361455", "step": 3518, "epoch": 2 }, { "type": "loss", "content": 0.002848744625225663, "timestamp": "2025-09-30 22:17:58.374025", "step": 3519, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:58.411963", "step": 3519, "epoch": 2 }, { "type": "loss", "content": 0.005674897227436304, "timestamp": "2025-09-30 22:17:58.440795", "step": 3520, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:17:58.475140", "step": 3520, "epoch": 2 }, { "type": "loss", "content": 0.0039841290563344955, "timestamp": "2025-09-30 22:17:58.480054", "step": 3521, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:58.512964", "step": 3521, "epoch": 2 }, { "type": "loss", "content": 0.006728755310177803, "timestamp": "2025-09-30 22:17:58.526478", "step": 3522, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:58.566534", "step": 3522, "epoch": 2 }, { "type": "loss", "content": 0.008942547254264355, "timestamp": "2025-09-30 22:17:58.576948", "step": 3523, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:17:58.609868", "step": 3523, "epoch": 2 }, { "type": "loss", "content": 0.006240403279662132, "timestamp": "2025-09-30 22:17:58.638372", "step": 3524, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:58.671331", "step": 3524, "epoch": 2 }, { "type": "loss", "content": 0.004982698708772659, "timestamp": "2025-09-30 22:17:58.682350", "step": 3525, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:17:58.735511", "step": 3525, "epoch": 2 }, { "type": "loss", "content": 0.005394322331994772, "timestamp": "2025-09-30 22:17:58.752678", "step": 3526, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:17:58.797192", "step": 3526, "epoch": 2 }, { "type": "loss", "content": 0.012957288883626461, "timestamp": "2025-09-30 22:17:58.813316", "step": 3527, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:58.849195", "step": 3527, "epoch": 2 }, { "type": "loss", "content": 0.015166162513196468, "timestamp": "2025-09-30 22:17:58.883382", "step": 3528, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:58.921823", "step": 3528, "epoch": 2 }, { "type": "loss", "content": 0.00798399280756712, "timestamp": "2025-09-30 22:17:58.931785", "step": 3529, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:58.978640", "step": 3529, "epoch": 2 }, { "type": "loss", "content": 0.012991075403988361, "timestamp": "2025-09-30 22:17:58.992061", "step": 3530, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:59.030718", "step": 3530, "epoch": 2 }, { "type": "loss", "content": 0.008770058862864971, "timestamp": "2025-09-30 22:17:59.044061", "step": 3531, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:59.087615", "step": 3531, "epoch": 2 }, { "type": "loss", "content": 0.011841779574751854, "timestamp": "2025-09-30 22:17:59.121902", "step": 3532, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:17:59.160113", "step": 3532, "epoch": 2 }, { "type": "loss", "content": 0.00944326352328062, "timestamp": "2025-09-30 22:17:59.173289", "step": 3533, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:59.206108", "step": 3533, "epoch": 2 }, { "type": "loss", "content": 0.005587635096162558, "timestamp": "2025-09-30 22:17:59.216373", "step": 3534, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:59.254155", "step": 3534, "epoch": 2 }, { "type": "loss", "content": 0.010135992430150509, "timestamp": "2025-09-30 22:17:59.267917", "step": 3535, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:59.311584", "step": 3535, "epoch": 2 }, { "type": "loss", "content": 0.010656927712261677, "timestamp": "2025-09-30 22:17:59.344817", "step": 3536, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:17:59.387294", "step": 3536, "epoch": 2 }, { "type": "loss", "content": 0.005624601151794195, "timestamp": "2025-09-30 22:17:59.400338", "step": 3537, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:59.448040", "step": 3537, "epoch": 2 }, { "type": "loss", "content": 0.004959089681506157, "timestamp": "2025-09-30 22:17:59.460572", "step": 3538, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:17:59.495632", "step": 3538, "epoch": 2 }, { "type": "loss", "content": 0.007135627791285515, "timestamp": "2025-09-30 22:17:59.503316", "step": 3539, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:17:59.541476", "step": 3539, "epoch": 2 }, { "type": "loss", "content": 0.009773723781108856, "timestamp": "2025-09-30 22:17:59.575695", "step": 3540, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:17:59.617889", "step": 3540, "epoch": 2 }, { "type": "loss", "content": 0.0032830105628818274, "timestamp": "2025-09-30 22:17:59.633062", "step": 3541, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:59.671348", "step": 3541, "epoch": 2 }, { "type": "loss", "content": 0.01609373278915882, "timestamp": "2025-09-30 22:17:59.683650", "step": 3542, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:17:59.721876", "step": 3542, "epoch": 2 }, { "type": "loss", "content": 0.0061156414449214935, "timestamp": "2025-09-30 22:17:59.734455", "step": 3543, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:17:59.769153", "step": 3543, "epoch": 2 }, { "type": "loss", "content": 0.0034365863539278507, "timestamp": "2025-09-30 22:17:59.802349", "step": 3544, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:59.835512", "step": 3544, "epoch": 2 }, { "type": "loss", "content": 0.005177946295589209, "timestamp": "2025-09-30 22:17:59.844203", "step": 3545, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:17:59.880452", "step": 3545, "epoch": 2 }, { "type": "loss", "content": 0.00885400827974081, "timestamp": "2025-09-30 22:17:59.891367", "step": 3546, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:59.934709", "step": 3546, "epoch": 2 }, { "type": "loss", "content": 0.00847975630313158, "timestamp": "2025-09-30 22:17:59.945110", "step": 3547, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:17:59.994731", "step": 3547, "epoch": 2 }, { "type": "loss", "content": 0.005916960071772337, "timestamp": "2025-09-30 22:18:00.025886", "step": 3548, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:00.063673", "step": 3548, "epoch": 2 }, { "type": "loss", "content": 0.0055039809085428715, "timestamp": "2025-09-30 22:18:00.072294", "step": 3549, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:00.115768", "step": 3549, "epoch": 2 }, { "type": "loss", "content": 0.005276626441627741, "timestamp": "2025-09-30 22:18:00.126774", "step": 3550, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:00.168581", "step": 3550, "epoch": 2 }, { "type": "loss", "content": 0.0064256805926561356, "timestamp": "2025-09-30 22:18:00.176222", "step": 3551, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:00.219163", "step": 3551, "epoch": 2 }, { "type": "loss", "content": 0.011961769312620163, "timestamp": "2025-09-30 22:18:00.253763", "step": 3552, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:00.298773", "step": 3552, "epoch": 2 }, { "type": "loss", "content": 0.0013833267148584127, "timestamp": "2025-09-30 22:18:00.311767", "step": 3553, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:00.345731", "step": 3553, "epoch": 2 }, { "type": "loss", "content": 0.0023183198645710945, "timestamp": "2025-09-30 22:18:00.358288", "step": 3554, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:00.393100", "step": 3554, "epoch": 2 }, { "type": "loss", "content": 0.009293297305703163, "timestamp": "2025-09-30 22:18:00.403428", "step": 3555, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:00.444093", "step": 3555, "epoch": 2 }, { "type": "loss", "content": 0.004751947708427906, "timestamp": "2025-09-30 22:18:00.486685", "step": 3556, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:00.533955", "step": 3556, "epoch": 2 }, { "type": "loss", "content": 0.005438275169581175, "timestamp": "2025-09-30 22:18:00.547006", "step": 3557, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:00.586092", "step": 3557, "epoch": 2 }, { "type": "loss", "content": 0.0027588580269366503, "timestamp": "2025-09-30 22:18:00.599803", "step": 3558, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:18:00.648346", "step": 3558, "epoch": 2 }, { "type": "loss", "content": 0.003551148111000657, "timestamp": "2025-09-30 22:18:00.664703", "step": 3559, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:00.701927", "step": 3559, "epoch": 2 }, { "type": "loss", "content": 0.005467879585921764, "timestamp": "2025-09-30 22:18:00.734030", "step": 3560, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:00.780843", "step": 3560, "epoch": 2 }, { "type": "loss", "content": 0.0032824459485709667, "timestamp": "2025-09-30 22:18:00.790777", "step": 3561, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:00.825505", "step": 3561, "epoch": 2 }, { "type": "loss", "content": 0.006720301229506731, "timestamp": "2025-09-30 22:18:00.838056", "step": 3562, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:00.891753", "step": 3562, "epoch": 2 }, { "type": "loss", "content": 0.004803223069757223, "timestamp": "2025-09-30 22:18:00.902055", "step": 3563, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:00.939996", "step": 3563, "epoch": 2 }, { "type": "loss", "content": 0.010715251788496971, "timestamp": "2025-09-30 22:18:00.973434", "step": 3564, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:01.014551", "step": 3564, "epoch": 2 }, { "type": "loss", "content": 0.01167300995439291, "timestamp": "2025-09-30 22:18:01.023360", "step": 3565, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:18:03.579866", "step": 3565, "epoch": 2 }, { "type": "pplx", "content": 5.9511250598265555, "timestamp": "2025-09-30 22:18:03.587462", "step": 3565, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:03.626031", "step": 3565, "epoch": 2 }, { "type": "loss", "content": 0.008313131518661976, "timestamp": "2025-09-30 22:18:03.636593", "step": 3566, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:03.676883", "step": 3566, "epoch": 2 }, { "type": "loss", "content": 0.00039068798650987446, "timestamp": "2025-09-30 22:18:03.689479", "step": 3567, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:03.725437", "step": 3567, "epoch": 2 }, { "type": "loss", "content": 0.0022331487853080034, "timestamp": "2025-09-30 22:18:03.758546", "step": 3568, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:03.799546", "step": 3568, "epoch": 2 }, { "type": "loss", "content": 0.0064695486798882484, "timestamp": "2025-09-30 22:18:03.805245", "step": 3569, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:03.847191", "step": 3569, "epoch": 2 }, { "type": "loss", "content": 0.005519408266991377, "timestamp": "2025-09-30 22:18:03.860630", "step": 3570, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:03.896338", "step": 3570, "epoch": 2 }, { "type": "loss", "content": 0.006128426641225815, "timestamp": "2025-09-30 22:18:03.904310", "step": 3571, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:03.943387", "step": 3571, "epoch": 2 }, { "type": "loss", "content": 0.00431226147338748, "timestamp": "2025-09-30 22:18:03.978004", "step": 3572, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:04.018027", "step": 3572, "epoch": 2 }, { "type": "loss", "content": 0.0014643726171925664, "timestamp": "2025-09-30 22:18:04.025948", "step": 3573, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:04.066185", "step": 3573, "epoch": 2 }, { "type": "loss", "content": 0.005359445232897997, "timestamp": "2025-09-30 22:18:04.073064", "step": 3574, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:04.107715", "step": 3574, "epoch": 2 }, { "type": "loss", "content": 0.0027361405082046986, "timestamp": "2025-09-30 22:18:04.118580", "step": 3575, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:04.164337", "step": 3575, "epoch": 2 }, { "type": "loss", "content": 0.005159073509275913, "timestamp": "2025-09-30 22:18:04.195588", "step": 3576, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:04.232694", "step": 3576, "epoch": 2 }, { "type": "loss", "content": 0.005905451253056526, "timestamp": "2025-09-30 22:18:04.238343", "step": 3577, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:04.276581", "step": 3577, "epoch": 2 }, { "type": "loss", "content": 0.0021716770716011524, "timestamp": "2025-09-30 22:18:04.288924", "step": 3578, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:04.323677", "step": 3578, "epoch": 2 }, { "type": "loss", "content": 0.0011484583374112844, "timestamp": "2025-09-30 22:18:04.335832", "step": 3579, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:04.386138", "step": 3579, "epoch": 2 }, { "type": "loss", "content": 0.008656934835016727, "timestamp": "2025-09-30 22:18:04.420674", "step": 3580, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:04.457080", "step": 3580, "epoch": 2 }, { "type": "loss", "content": 0.006916280835866928, "timestamp": "2025-09-30 22:18:04.470157", "step": 3581, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:04.518334", "step": 3581, "epoch": 2 }, { "type": "loss", "content": 0.0018486609915271401, "timestamp": "2025-09-30 22:18:04.525231", "step": 3582, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:18:04.564350", "step": 3582, "epoch": 2 }, { "type": "loss", "content": 0.02502571791410446, "timestamp": "2025-09-30 22:18:04.573590", "step": 3583, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:18:04.616304", "step": 3583, "epoch": 2 }, { "type": "loss", "content": 0.0017878885846585035, "timestamp": "2025-09-30 22:18:04.641288", "step": 3584, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:04.681221", "step": 3584, "epoch": 2 }, { "type": "loss", "content": 0.003630830440670252, "timestamp": "2025-09-30 22:18:04.690989", "step": 3585, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:04.726034", "step": 3585, "epoch": 2 }, { "type": "loss", "content": 0.0015692427987232804, "timestamp": "2025-09-30 22:18:04.736547", "step": 3586, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:04.769676", "step": 3586, "epoch": 2 }, { "type": "loss", "content": 0.002070869319140911, "timestamp": "2025-09-30 22:18:04.781709", "step": 3587, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:04.835350", "step": 3587, "epoch": 2 }, { "type": "loss", "content": 0.00642598420381546, "timestamp": "2025-09-30 22:18:04.867140", "step": 3588, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:04.905496", "step": 3588, "epoch": 2 }, { "type": "loss", "content": 0.0004540416703093797, "timestamp": "2025-09-30 22:18:04.916457", "step": 3589, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:04.963183", "step": 3589, "epoch": 2 }, { "type": "loss", "content": 0.0027480798307806253, "timestamp": "2025-09-30 22:18:04.971201", "step": 3590, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:05.017289", "step": 3590, "epoch": 2 }, { "type": "loss", "content": 0.004405462648719549, "timestamp": "2025-09-30 22:18:05.029796", "step": 3591, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:05.067979", "step": 3591, "epoch": 2 }, { "type": "loss", "content": 0.004299256484955549, "timestamp": "2025-09-30 22:18:05.101481", "step": 3592, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:05.146310", "step": 3592, "epoch": 2 }, { "type": "loss", "content": 0.02254783734679222, "timestamp": "2025-09-30 22:18:05.154885", "step": 3593, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:05.206608", "step": 3593, "epoch": 2 }, { "type": "loss", "content": 0.005831711459904909, "timestamp": "2025-09-30 22:18:05.219945", "step": 3594, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:05.266857", "step": 3594, "epoch": 2 }, { "type": "loss", "content": 0.011741677299141884, "timestamp": "2025-09-30 22:18:05.274767", "step": 3595, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:05.317630", "step": 3595, "epoch": 2 }, { "type": "loss", "content": 0.006558686029165983, "timestamp": "2025-09-30 22:18:05.348870", "step": 3596, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:05.392171", "step": 3596, "epoch": 2 }, { "type": "loss", "content": 0.0009061984019353986, "timestamp": "2025-09-30 22:18:05.397522", "step": 3597, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:05.437162", "step": 3597, "epoch": 2 }, { "type": "loss", "content": 0.012972662225365639, "timestamp": "2025-09-30 22:18:05.445145", "step": 3598, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:05.513240", "step": 3598, "epoch": 2 }, { "type": "loss", "content": 0.014107991009950638, "timestamp": "2025-09-30 22:18:05.526629", "step": 3599, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:05.562590", "step": 3599, "epoch": 2 }, { "type": "loss", "content": 0.009034661576151848, "timestamp": "2025-09-30 22:18:05.594698", "step": 3600, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:05.634592", "step": 3600, "epoch": 2 }, { "type": "loss", "content": 0.009804898872971535, "timestamp": "2025-09-30 22:18:05.642229", "step": 3601, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:05.695011", "step": 3601, "epoch": 2 }, { "type": "loss", "content": 0.006780870258808136, "timestamp": "2025-09-30 22:18:05.708401", "step": 3602, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:18:05.762370", "step": 3602, "epoch": 2 }, { "type": "loss", "content": 0.012236690148711205, "timestamp": "2025-09-30 22:18:05.779964", "step": 3603, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:05.826822", "step": 3603, "epoch": 2 }, { "type": "loss", "content": 0.0076903593726456165, "timestamp": "2025-09-30 22:18:05.861410", "step": 3604, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:18:05.900535", "step": 3604, "epoch": 2 }, { "type": "loss", "content": 0.010933350771665573, "timestamp": "2025-09-30 22:18:05.915645", "step": 3605, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:05.953390", "step": 3605, "epoch": 2 }, { "type": "loss", "content": 0.017923496663570404, "timestamp": "2025-09-30 22:18:05.965949", "step": 3606, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:06.017167", "step": 3606, "epoch": 2 }, { "type": "loss", "content": 0.007505920249968767, "timestamp": "2025-09-30 22:18:06.029706", "step": 3607, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:06.084058", "step": 3607, "epoch": 2 }, { "type": "loss", "content": 0.00268744770437479, "timestamp": "2025-09-30 22:18:06.118572", "step": 3608, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:06.156299", "step": 3608, "epoch": 2 }, { "type": "loss", "content": 0.005204895976930857, "timestamp": "2025-09-30 22:18:06.166845", "step": 3609, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:06.201351", "step": 3609, "epoch": 2 }, { "type": "loss", "content": 0.010513301938772202, "timestamp": "2025-09-30 22:18:06.213741", "step": 3610, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:06.252494", "step": 3610, "epoch": 2 }, { "type": "loss", "content": 0.005763137713074684, "timestamp": "2025-09-30 22:18:06.266197", "step": 3611, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:06.301990", "step": 3611, "epoch": 2 }, { "type": "loss", "content": 0.0038706306368112564, "timestamp": "2025-09-30 22:18:06.333284", "step": 3612, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:06.366805", "step": 3612, "epoch": 2 }, { "type": "loss", "content": 0.010753096081316471, "timestamp": "2025-09-30 22:18:06.376876", "step": 3613, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:06.424707", "step": 3613, "epoch": 2 }, { "type": "loss", "content": 0.01010825764387846, "timestamp": "2025-09-30 22:18:06.438500", "step": 3614, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:06.487863", "step": 3614, "epoch": 2 }, { "type": "loss", "content": 0.008509020321071148, "timestamp": "2025-09-30 22:18:06.500388", "step": 3615, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:06.534037", "step": 3615, "epoch": 2 }, { "type": "loss", "content": 0.004479021765291691, "timestamp": "2025-09-30 22:18:06.565350", "step": 3616, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:06.601866", "step": 3616, "epoch": 2 }, { "type": "loss", "content": 0.004267436917871237, "timestamp": "2025-09-30 22:18:06.610562", "step": 3617, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:06.655877", "step": 3617, "epoch": 2 }, { "type": "loss", "content": 0.003969523124396801, "timestamp": "2025-09-30 22:18:06.667272", "step": 3618, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:06.701524", "step": 3618, "epoch": 2 }, { "type": "loss", "content": 0.0031346294563263655, "timestamp": "2025-09-30 22:18:06.709634", "step": 3619, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:06.749965", "step": 3619, "epoch": 2 }, { "type": "loss", "content": 0.0017775017768144608, "timestamp": "2025-09-30 22:18:06.784509", "step": 3620, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:06.824116", "step": 3620, "epoch": 2 }, { "type": "loss", "content": 0.004031859338283539, "timestamp": "2025-09-30 22:18:06.839557", "step": 3621, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:06.877903", "step": 3621, "epoch": 2 }, { "type": "loss", "content": 0.003785045351833105, "timestamp": "2025-09-30 22:18:06.891671", "step": 3622, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:06.940169", "step": 3622, "epoch": 2 }, { "type": "loss", "content": 0.001441820291802287, "timestamp": "2025-09-30 22:18:06.954052", "step": 3623, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:07.013384", "step": 3623, "epoch": 2 }, { "type": "loss", "content": 0.005841195583343506, "timestamp": "2025-09-30 22:18:07.046775", "step": 3624, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:07.091239", "step": 3624, "epoch": 2 }, { "type": "loss", "content": 0.0024000804405659437, "timestamp": "2025-09-30 22:18:07.100823", "step": 3625, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:07.148537", "step": 3625, "epoch": 2 }, { "type": "loss", "content": 0.0025940327905118465, "timestamp": "2025-09-30 22:18:07.161070", "step": 3626, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:07.204166", "step": 3626, "epoch": 2 }, { "type": "loss", "content": 0.004042869433760643, "timestamp": "2025-09-30 22:18:07.220090", "step": 3627, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:07.255740", "step": 3627, "epoch": 2 }, { "type": "loss", "content": 0.0035541884135454893, "timestamp": "2025-09-30 22:18:07.284225", "step": 3628, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:18:07.326175", "step": 3628, "epoch": 2 }, { "type": "loss", "content": 0.01492700632661581, "timestamp": "2025-09-30 22:18:07.341752", "step": 3629, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:07.375854", "step": 3629, "epoch": 2 }, { "type": "loss", "content": 0.005095267202705145, "timestamp": "2025-09-30 22:18:07.386831", "step": 3630, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:07.422853", "step": 3630, "epoch": 2 }, { "type": "loss", "content": 0.0027024373412132263, "timestamp": "2025-09-30 22:18:07.433809", "step": 3631, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:07.470453", "step": 3631, "epoch": 2 }, { "type": "loss", "content": 0.01257782056927681, "timestamp": "2025-09-30 22:18:07.501664", "step": 3632, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:18:07.543839", "step": 3632, "epoch": 2 }, { "type": "loss", "content": 0.0018136730650439858, "timestamp": "2025-09-30 22:18:07.559505", "step": 3633, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:07.608170", "step": 3633, "epoch": 2 }, { "type": "loss", "content": 0.012361763045191765, "timestamp": "2025-09-30 22:18:07.621980", "step": 3634, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:07.662675", "step": 3634, "epoch": 2 }, { "type": "loss", "content": 0.006774898152798414, "timestamp": "2025-09-30 22:18:07.676567", "step": 3635, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:18:07.726282", "step": 3635, "epoch": 2 }, { "type": "loss", "content": 0.0051282295025885105, "timestamp": "2025-09-30 22:18:07.764407", "step": 3636, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:07.808806", "step": 3636, "epoch": 2 }, { "type": "loss", "content": 0.013438411988317966, "timestamp": "2025-09-30 22:18:07.817381", "step": 3637, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:07.863841", "step": 3637, "epoch": 2 }, { "type": "loss", "content": 0.005512099713087082, "timestamp": "2025-09-30 22:18:07.876073", "step": 3638, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:07.917012", "step": 3638, "epoch": 2 }, { "type": "loss", "content": 0.006519559770822525, "timestamp": "2025-09-30 22:18:07.930701", "step": 3639, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:07.968613", "step": 3639, "epoch": 2 }, { "type": "loss", "content": 0.004097971599549055, "timestamp": "2025-09-30 22:18:08.003521", "step": 3640, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:08.054465", "step": 3640, "epoch": 2 }, { "type": "loss", "content": 0.013717074878513813, "timestamp": "2025-09-30 22:18:08.062783", "step": 3641, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:08.172347", "step": 3641, "epoch": 2 }, { "type": "loss", "content": 0.003827743697911501, "timestamp": "2025-09-30 22:18:08.185713", "step": 3642, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:08.244585", "step": 3642, "epoch": 2 }, { "type": "loss", "content": 0.00614333339035511, "timestamp": "2025-09-30 22:18:08.258295", "step": 3643, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:08.326999", "step": 3643, "epoch": 2 }, { "type": "loss", "content": 0.001711672986857593, "timestamp": "2025-09-30 22:18:08.360023", "step": 3644, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:08.411885", "step": 3644, "epoch": 2 }, { "type": "loss", "content": 0.005505403969436884, "timestamp": "2025-09-30 22:18:08.421670", "step": 3645, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:08.484141", "step": 3645, "epoch": 2 }, { "type": "loss", "content": 0.002894895849749446, "timestamp": "2025-09-30 22:18:08.494948", "step": 3646, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:08.550174", "step": 3646, "epoch": 2 }, { "type": "loss", "content": 0.007011787500232458, "timestamp": "2025-09-30 22:18:08.560245", "step": 3647, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:08.673565", "step": 3647, "epoch": 2 }, { "type": "loss", "content": 0.00981463398784399, "timestamp": "2025-09-30 22:18:08.705387", "step": 3648, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:08.766177", "step": 3648, "epoch": 2 }, { "type": "loss", "content": 0.0028689431492239237, "timestamp": "2025-09-30 22:18:08.782746", "step": 3649, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:08.851521", "step": 3649, "epoch": 2 }, { "type": "loss", "content": 0.0021789041347801685, "timestamp": "2025-09-30 22:18:08.858237", "step": 3650, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:08.916571", "step": 3650, "epoch": 2 }, { "type": "loss", "content": 0.00205511343665421, "timestamp": "2025-09-30 22:18:08.929929", "step": 3651, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:08.995585", "step": 3651, "epoch": 2 }, { "type": "loss", "content": 0.007314031012356281, "timestamp": "2025-09-30 22:18:09.028974", "step": 3652, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:09.090624", "step": 3652, "epoch": 2 }, { "type": "loss", "content": 0.005639208946377039, "timestamp": "2025-09-30 22:18:09.100618", "step": 3653, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:09.162051", "step": 3653, "epoch": 2 }, { "type": "loss", "content": 0.009423138573765755, "timestamp": "2025-09-30 22:18:09.174588", "step": 3654, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:09.245390", "step": 3654, "epoch": 2 }, { "type": "loss", "content": 0.007394131738692522, "timestamp": "2025-09-30 22:18:09.258764", "step": 3655, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:09.334706", "step": 3655, "epoch": 2 }, { "type": "loss", "content": 0.004264110699295998, "timestamp": "2025-09-30 22:18:09.367936", "step": 3656, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:09.447573", "step": 3656, "epoch": 2 }, { "type": "loss", "content": 0.004879354499280453, "timestamp": "2025-09-30 22:18:09.460917", "step": 3657, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:09.513441", "step": 3657, "epoch": 2 }, { "type": "loss", "content": 0.004300492815673351, "timestamp": "2025-09-30 22:18:09.525662", "step": 3658, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 592 ], "flops": 17560600598464 }, "timestamp": "2025-09-30 22:18:09.602920", "step": 3658, "epoch": 2 }, { "type": "loss", "content": 0.0033125756308436394, "timestamp": "2025-09-30 22:18:09.624011", "step": 3659, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:09.667155", "step": 3659, "epoch": 2 }, { "type": "loss", "content": 0.0047155385836958885, "timestamp": "2025-09-30 22:18:09.701400", "step": 3660, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:09.742719", "step": 3660, "epoch": 2 }, { "type": "loss", "content": 0.0008582398295402527, "timestamp": "2025-09-30 22:18:09.747104", "step": 3661, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:09.787113", "step": 3661, "epoch": 2 }, { "type": "loss", "content": 0.00034500984475016594, "timestamp": "2025-09-30 22:18:09.799403", "step": 3662, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:09.840305", "step": 3662, "epoch": 2 }, { "type": "loss", "content": 0.002383210463449359, "timestamp": "2025-09-30 22:18:09.853698", "step": 3663, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:09.910050", "step": 3663, "epoch": 2 }, { "type": "loss", "content": 0.0007570055895484984, "timestamp": "2025-09-30 22:18:09.943472", "step": 3664, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:09.982836", "step": 3664, "epoch": 2 }, { "type": "loss", "content": 0.0018659909255802631, "timestamp": "2025-09-30 22:18:09.995484", "step": 3665, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:10.032010", "step": 3665, "epoch": 2 }, { "type": "loss", "content": 0.004569348879158497, "timestamp": "2025-09-30 22:18:10.043038", "step": 3666, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:10.085512", "step": 3666, "epoch": 2 }, { "type": "loss", "content": 0.016304440796375275, "timestamp": "2025-09-30 22:18:10.097866", "step": 3667, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:10.146805", "step": 3667, "epoch": 2 }, { "type": "loss", "content": 0.016486702486872673, "timestamp": "2025-09-30 22:18:10.181555", "step": 3668, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:10.216748", "step": 3668, "epoch": 2 }, { "type": "loss", "content": 0.007853741757571697, "timestamp": "2025-09-30 22:18:10.229392", "step": 3669, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:10.276372", "step": 3669, "epoch": 2 }, { "type": "loss", "content": 0.008043302223086357, "timestamp": "2025-09-30 22:18:10.288986", "step": 3670, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:10.327790", "step": 3670, "epoch": 2 }, { "type": "loss", "content": 0.005524238105863333, "timestamp": "2025-09-30 22:18:10.341529", "step": 3671, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:10.398078", "step": 3671, "epoch": 2 }, { "type": "loss", "content": 0.013961317017674446, "timestamp": "2025-09-30 22:18:10.431345", "step": 3672, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:10.471043", "step": 3672, "epoch": 2 }, { "type": "loss", "content": 0.02146792970597744, "timestamp": "2025-09-30 22:18:10.480894", "step": 3673, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:10.523058", "step": 3673, "epoch": 2 }, { "type": "loss", "content": 0.013621116988360882, "timestamp": "2025-09-30 22:18:10.534335", "step": 3674, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:10.575568", "step": 3674, "epoch": 2 }, { "type": "loss", "content": 0.04091104492545128, "timestamp": "2025-09-30 22:18:10.585857", "step": 3675, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:10.631802", "step": 3675, "epoch": 2 }, { "type": "loss", "content": 0.005582859739661217, "timestamp": "2025-09-30 22:18:10.666662", "step": 3676, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:10.729822", "step": 3676, "epoch": 2 }, { "type": "loss", "content": 0.0049339765682816505, "timestamp": "2025-09-30 22:18:10.742877", "step": 3677, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:18:10.792187", "step": 3677, "epoch": 2 }, { "type": "loss", "content": 0.006935216952115297, "timestamp": "2025-09-30 22:18:10.809936", "step": 3678, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:10.853920", "step": 3678, "epoch": 2 }, { "type": "loss", "content": 0.011995434761047363, "timestamp": "2025-09-30 22:18:10.869857", "step": 3679, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:10.918129", "step": 3679, "epoch": 2 }, { "type": "loss", "content": 0.00030699989292770624, "timestamp": "2025-09-30 22:18:10.952403", "step": 3680, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:18:13.583130", "step": 3680, "epoch": 2 }, { "type": "pplx", "content": 5.94899863726725, "timestamp": "2025-09-30 22:18:13.589379", "step": 3680, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:13.624541", "step": 3680, "epoch": 2 }, { "type": "loss", "content": 0.0005120789282955229, "timestamp": "2025-09-30 22:18:13.633037", "step": 3681, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:13.669467", "step": 3681, "epoch": 2 }, { "type": "loss", "content": 0.005108590237796307, "timestamp": "2025-09-30 22:18:13.682862", "step": 3682, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:13.727819", "step": 3682, "epoch": 2 }, { "type": "loss", "content": 0.009081355296075344, "timestamp": "2025-09-30 22:18:13.741147", "step": 3683, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:13.782928", "step": 3683, "epoch": 2 }, { "type": "loss", "content": 0.0060650804080069065, "timestamp": "2025-09-30 22:18:13.817521", "step": 3684, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:13.857715", "step": 3684, "epoch": 2 }, { "type": "loss", "content": 0.004745157901197672, "timestamp": "2025-09-30 22:18:13.870357", "step": 3685, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:13.913884", "step": 3685, "epoch": 2 }, { "type": "loss", "content": 0.01413491740822792, "timestamp": "2025-09-30 22:18:13.926461", "step": 3686, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:13.969493", "step": 3686, "epoch": 2 }, { "type": "loss", "content": 0.009859028272330761, "timestamp": "2025-09-30 22:18:13.983321", "step": 3687, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:14.018550", "step": 3687, "epoch": 2 }, { "type": "loss", "content": 0.014908470213413239, "timestamp": "2025-09-30 22:18:14.047320", "step": 3688, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:14.085736", "step": 3688, "epoch": 3 }, { "type": "loss", "content": 0.032499223947525024, "timestamp": "2025-09-30 22:18:14.091241", "step": 3689, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:14.129379", "step": 3689, "epoch": 3 }, { "type": "loss", "content": 0.0037434506230056286, "timestamp": "2025-09-30 22:18:14.142741", "step": 3690, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:14.177667", "step": 3690, "epoch": 3 }, { "type": "loss", "content": 0.008036208339035511, "timestamp": "2025-09-30 22:18:14.185276", "step": 3691, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:14.218654", "step": 3691, "epoch": 3 }, { "type": "loss", "content": 0.005498153623193502, "timestamp": "2025-09-30 22:18:14.250584", "step": 3692, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:14.289796", "step": 3692, "epoch": 3 }, { "type": "loss", "content": 0.005650009959936142, "timestamp": "2025-09-30 22:18:14.299679", "step": 3693, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:14.339116", "step": 3693, "epoch": 3 }, { "type": "loss", "content": 0.005255770869553089, "timestamp": "2025-09-30 22:18:14.346721", "step": 3694, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:14.382786", "step": 3694, "epoch": 3 }, { "type": "loss", "content": 0.004523404873907566, "timestamp": "2025-09-30 22:18:14.396185", "step": 3695, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:14.433585", "step": 3695, "epoch": 3 }, { "type": "loss", "content": 0.005276726558804512, "timestamp": "2025-09-30 22:18:14.466981", "step": 3696, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:18:14.517339", "step": 3696, "epoch": 3 }, { "type": "loss", "content": 0.0043405890464782715, "timestamp": "2025-09-30 22:18:14.534000", "step": 3697, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:14.567132", "step": 3697, "epoch": 3 }, { "type": "loss", "content": 0.008746081963181496, "timestamp": "2025-09-30 22:18:14.578258", "step": 3698, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:14.615584", "step": 3698, "epoch": 3 }, { "type": "loss", "content": 0.0019219155656173825, "timestamp": "2025-09-30 22:18:14.628935", "step": 3699, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:14.679949", "step": 3699, "epoch": 3 }, { "type": "loss", "content": 0.005548653658479452, "timestamp": "2025-09-30 22:18:14.714160", "step": 3700, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:14.749749", "step": 3700, "epoch": 3 }, { "type": "loss", "content": 0.0061048720963299274, "timestamp": "2025-09-30 22:18:14.754964", "step": 3701, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:14.788440", "step": 3701, "epoch": 3 }, { "type": "loss", "content": 0.007814295589923859, "timestamp": "2025-09-30 22:18:14.796389", "step": 3702, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:14.839871", "step": 3702, "epoch": 3 }, { "type": "loss", "content": 0.009157909080386162, "timestamp": "2025-09-30 22:18:14.850974", "step": 3703, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:14.890618", "step": 3703, "epoch": 3 }, { "type": "loss", "content": 0.013317212462425232, "timestamp": "2025-09-30 22:18:14.924864", "step": 3704, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:14.969274", "step": 3704, "epoch": 3 }, { "type": "loss", "content": 0.012214075773954391, "timestamp": "2025-09-30 22:18:14.977380", "step": 3705, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:15.025620", "step": 3705, "epoch": 3 }, { "type": "loss", "content": 0.005884271580725908, "timestamp": "2025-09-30 22:18:15.038246", "step": 3706, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:15.083113", "step": 3706, "epoch": 3 }, { "type": "loss", "content": 0.010557309724390507, "timestamp": "2025-09-30 22:18:15.090296", "step": 3707, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:15.125585", "step": 3707, "epoch": 3 }, { "type": "loss", "content": 0.0075211371295154095, "timestamp": "2025-09-30 22:18:15.154332", "step": 3708, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:15.193522", "step": 3708, "epoch": 3 }, { "type": "loss", "content": 0.012411229312419891, "timestamp": "2025-09-30 22:18:15.199191", "step": 3709, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:15.232039", "step": 3709, "epoch": 3 }, { "type": "loss", "content": 0.0031475538853555918, "timestamp": "2025-09-30 22:18:15.240086", "step": 3710, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:15.272609", "step": 3710, "epoch": 3 }, { "type": "loss", "content": 0.003913676366209984, "timestamp": "2025-09-30 22:18:15.279503", "step": 3711, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:15.312554", "step": 3711, "epoch": 3 }, { "type": "loss", "content": 0.005341598764061928, "timestamp": "2025-09-30 22:18:15.340442", "step": 3712, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:15.379094", "step": 3712, "epoch": 3 }, { "type": "loss", "content": 0.005785258952528238, "timestamp": "2025-09-30 22:18:15.392206", "step": 3713, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:15.432110", "step": 3713, "epoch": 3 }, { "type": "loss", "content": 0.017216932028532028, "timestamp": "2025-09-30 22:18:15.439323", "step": 3714, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:15.476184", "step": 3714, "epoch": 3 }, { "type": "loss", "content": 0.010194359347224236, "timestamp": "2025-09-30 22:18:15.489908", "step": 3715, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:15.523929", "step": 3715, "epoch": 3 }, { "type": "loss", "content": 0.014519540593028069, "timestamp": "2025-09-30 22:18:15.557325", "step": 3716, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:15.593431", "step": 3716, "epoch": 3 }, { "type": "loss", "content": 0.008453921414911747, "timestamp": "2025-09-30 22:18:15.603977", "step": 3717, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:15.642795", "step": 3717, "epoch": 3 }, { "type": "loss", "content": 0.006131226196885109, "timestamp": "2025-09-30 22:18:15.656203", "step": 3718, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:15.691523", "step": 3718, "epoch": 3 }, { "type": "loss", "content": 0.00950410682708025, "timestamp": "2025-09-30 22:18:15.702563", "step": 3719, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:15.738430", "step": 3719, "epoch": 3 }, { "type": "loss", "content": 0.002914604963734746, "timestamp": "2025-09-30 22:18:15.767307", "step": 3720, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:15.813277", "step": 3720, "epoch": 3 }, { "type": "loss", "content": 0.0038131820037961006, "timestamp": "2025-09-30 22:18:15.826248", "step": 3721, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:15.862374", "step": 3721, "epoch": 3 }, { "type": "loss", "content": 0.005772165954113007, "timestamp": "2025-09-30 22:18:15.873484", "step": 3722, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:15.913366", "step": 3722, "epoch": 3 }, { "type": "loss", "content": 0.012822024524211884, "timestamp": "2025-09-30 22:18:15.927075", "step": 3723, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:15.970664", "step": 3723, "epoch": 3 }, { "type": "loss", "content": 0.012181980535387993, "timestamp": "2025-09-30 22:18:16.005223", "step": 3724, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:16.042346", "step": 3724, "epoch": 3 }, { "type": "loss", "content": 0.003662961069494486, "timestamp": "2025-09-30 22:18:16.048077", "step": 3725, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:16.083318", "step": 3725, "epoch": 3 }, { "type": "loss", "content": 0.0059233191423118114, "timestamp": "2025-09-30 22:18:16.094446", "step": 3726, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:16.130168", "step": 3726, "epoch": 3 }, { "type": "loss", "content": 0.00668327696621418, "timestamp": "2025-09-30 22:18:16.137169", "step": 3727, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:16.177771", "step": 3727, "epoch": 3 }, { "type": "loss", "content": 0.0058019645512104034, "timestamp": "2025-09-30 22:18:16.205353", "step": 3728, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:16.243485", "step": 3728, "epoch": 3 }, { "type": "loss", "content": 0.018236473202705383, "timestamp": "2025-09-30 22:18:16.249032", "step": 3729, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:16.287176", "step": 3729, "epoch": 3 }, { "type": "loss", "content": 0.00995565950870514, "timestamp": "2025-09-30 22:18:16.297575", "step": 3730, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:18:16.349980", "step": 3730, "epoch": 3 }, { "type": "loss", "content": 0.005770450923591852, "timestamp": "2025-09-30 22:18:16.367682", "step": 3731, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:16.414736", "step": 3731, "epoch": 3 }, { "type": "loss", "content": 0.006130880210548639, "timestamp": "2025-09-30 22:18:16.449245", "step": 3732, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:16.487474", "step": 3732, "epoch": 3 }, { "type": "loss", "content": 0.005417075008153915, "timestamp": "2025-09-30 22:18:16.502927", "step": 3733, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:16.535994", "step": 3733, "epoch": 3 }, { "type": "loss", "content": 0.010411316528916359, "timestamp": "2025-09-30 22:18:16.548101", "step": 3734, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:16.583282", "step": 3734, "epoch": 3 }, { "type": "loss", "content": 0.008753958158195019, "timestamp": "2025-09-30 22:18:16.591092", "step": 3735, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:16.666646", "step": 3735, "epoch": 3 }, { "type": "loss", "content": 0.008556557819247246, "timestamp": "2025-09-30 22:18:16.698704", "step": 3736, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:16.753508", "step": 3736, "epoch": 3 }, { "type": "loss", "content": 0.009406271390616894, "timestamp": "2025-09-30 22:18:16.759246", "step": 3737, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:16.792796", "step": 3737, "epoch": 3 }, { "type": "loss", "content": 0.016798771917819977, "timestamp": "2025-09-30 22:18:16.803919", "step": 3738, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:16.845608", "step": 3738, "epoch": 3 }, { "type": "loss", "content": 0.004465001169592142, "timestamp": "2025-09-30 22:18:16.853180", "step": 3739, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:16.903296", "step": 3739, "epoch": 3 }, { "type": "loss", "content": 0.004485375713557005, "timestamp": "2025-09-30 22:18:16.937502", "step": 3740, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:16.972287", "step": 3740, "epoch": 3 }, { "type": "loss", "content": 0.007075873203575611, "timestamp": "2025-09-30 22:18:16.985382", "step": 3741, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:17.033112", "step": 3741, "epoch": 3 }, { "type": "loss", "content": 0.008225277997553349, "timestamp": "2025-09-30 22:18:17.043940", "step": 3742, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:17.077473", "step": 3742, "epoch": 3 }, { "type": "loss", "content": 0.0036486752796918154, "timestamp": "2025-09-30 22:18:17.088440", "step": 3743, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:17.124654", "step": 3743, "epoch": 3 }, { "type": "loss", "content": 0.0066786594688892365, "timestamp": "2025-09-30 22:18:17.157610", "step": 3744, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:17.195367", "step": 3744, "epoch": 3 }, { "type": "loss", "content": 0.006404911633580923, "timestamp": "2025-09-30 22:18:17.206069", "step": 3745, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:17.247997", "step": 3745, "epoch": 3 }, { "type": "loss", "content": 0.007119921967387199, "timestamp": "2025-09-30 22:18:17.259023", "step": 3746, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:17.293777", "step": 3746, "epoch": 3 }, { "type": "loss", "content": 0.005634994246065617, "timestamp": "2025-09-30 22:18:17.300905", "step": 3747, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:17.338543", "step": 3747, "epoch": 3 }, { "type": "loss", "content": 0.013412128202617168, "timestamp": "2025-09-30 22:18:17.366620", "step": 3748, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:17.404523", "step": 3748, "epoch": 3 }, { "type": "loss", "content": 0.0031789415515959263, "timestamp": "2025-09-30 22:18:17.411273", "step": 3749, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:17.448584", "step": 3749, "epoch": 3 }, { "type": "loss", "content": 0.005689022596925497, "timestamp": "2025-09-30 22:18:17.457660", "step": 3750, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:17.498445", "step": 3750, "epoch": 3 }, { "type": "loss", "content": 0.009960222989320755, "timestamp": "2025-09-30 22:18:17.510824", "step": 3751, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:17.549958", "step": 3751, "epoch": 3 }, { "type": "loss", "content": 0.007598586846143007, "timestamp": "2025-09-30 22:18:17.584605", "step": 3752, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:17.624193", "step": 3752, "epoch": 3 }, { "type": "loss", "content": 0.007153657730668783, "timestamp": "2025-09-30 22:18:17.637518", "step": 3753, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:17.675946", "step": 3753, "epoch": 3 }, { "type": "loss", "content": 0.007242599036544561, "timestamp": "2025-09-30 22:18:17.683228", "step": 3754, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:17.717440", "step": 3754, "epoch": 3 }, { "type": "loss", "content": 0.012119963765144348, "timestamp": "2025-09-30 22:18:17.729662", "step": 3755, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:17.770034", "step": 3755, "epoch": 3 }, { "type": "loss", "content": 0.006791086867451668, "timestamp": "2025-09-30 22:18:17.803276", "step": 3756, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:17.848398", "step": 3756, "epoch": 3 }, { "type": "loss", "content": 0.008207529783248901, "timestamp": "2025-09-30 22:18:17.857735", "step": 3757, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:17.901325", "step": 3757, "epoch": 3 }, { "type": "loss", "content": 0.007791235111653805, "timestamp": "2025-09-30 22:18:17.915167", "step": 3758, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:17.957581", "step": 3758, "epoch": 3 }, { "type": "loss", "content": 0.0068681500852108, "timestamp": "2025-09-30 22:18:17.966773", "step": 3759, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:18.005725", "step": 3759, "epoch": 3 }, { "type": "loss", "content": 0.0025856555439531803, "timestamp": "2025-09-30 22:18:18.038880", "step": 3760, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:18.076082", "step": 3760, "epoch": 3 }, { "type": "loss", "content": 0.008086767978966236, "timestamp": "2025-09-30 22:18:18.084004", "step": 3761, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:18.134728", "step": 3761, "epoch": 3 }, { "type": "loss", "content": 0.004576331470161676, "timestamp": "2025-09-30 22:18:18.147311", "step": 3762, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:18.181583", "step": 3762, "epoch": 3 }, { "type": "loss", "content": 0.005068800412118435, "timestamp": "2025-09-30 22:18:18.192599", "step": 3763, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:18.230750", "step": 3763, "epoch": 3 }, { "type": "loss", "content": 0.006224495824426413, "timestamp": "2025-09-30 22:18:18.259263", "step": 3764, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:18.299343", "step": 3764, "epoch": 3 }, { "type": "loss", "content": 0.005041790194809437, "timestamp": "2025-09-30 22:18:18.309092", "step": 3765, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:18.358404", "step": 3765, "epoch": 3 }, { "type": "loss", "content": 0.004011180251836777, "timestamp": "2025-09-30 22:18:18.371021", "step": 3766, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:18.411470", "step": 3766, "epoch": 3 }, { "type": "loss", "content": 0.012970248237252235, "timestamp": "2025-09-30 22:18:18.422444", "step": 3767, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:18.458086", "step": 3767, "epoch": 3 }, { "type": "loss", "content": 0.010750544257462025, "timestamp": "2025-09-30 22:18:18.489510", "step": 3768, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:18.522637", "step": 3768, "epoch": 3 }, { "type": "loss", "content": 0.011017782613635063, "timestamp": "2025-09-30 22:18:18.531500", "step": 3769, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:18.565151", "step": 3769, "epoch": 3 }, { "type": "loss", "content": 0.003940001130104065, "timestamp": "2025-09-30 22:18:18.575387", "step": 3770, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:18.614676", "step": 3770, "epoch": 3 }, { "type": "loss", "content": 0.0042926715686917305, "timestamp": "2025-09-30 22:18:18.628008", "step": 3771, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:18:18.675700", "step": 3771, "epoch": 3 }, { "type": "loss", "content": 0.0032875791657716036, "timestamp": "2025-09-30 22:18:18.712741", "step": 3772, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:18.751794", "step": 3772, "epoch": 3 }, { "type": "loss", "content": 0.00762244313955307, "timestamp": "2025-09-30 22:18:18.764132", "step": 3773, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:18.810163", "step": 3773, "epoch": 3 }, { "type": "loss", "content": 0.006488482002168894, "timestamp": "2025-09-30 22:18:18.824042", "step": 3774, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:18.866935", "step": 3774, "epoch": 3 }, { "type": "loss", "content": 0.002553711412474513, "timestamp": "2025-09-30 22:18:18.880657", "step": 3775, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:18.928045", "step": 3775, "epoch": 3 }, { "type": "loss", "content": 0.012262634001672268, "timestamp": "2025-09-30 22:18:18.962252", "step": 3776, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:19.004165", "step": 3776, "epoch": 3 }, { "type": "loss", "content": 0.00850253738462925, "timestamp": "2025-09-30 22:18:19.017355", "step": 3777, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:19.053743", "step": 3777, "epoch": 3 }, { "type": "loss", "content": 0.00532106775790453, "timestamp": "2025-09-30 22:18:19.060620", "step": 3778, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:19.096076", "step": 3778, "epoch": 3 }, { "type": "loss", "content": 0.004747466649860144, "timestamp": "2025-09-30 22:18:19.109663", "step": 3779, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:19.149284", "step": 3779, "epoch": 3 }, { "type": "loss", "content": 0.008793232031166553, "timestamp": "2025-09-30 22:18:19.178008", "step": 3780, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:19.222410", "step": 3780, "epoch": 3 }, { "type": "loss", "content": 0.013200669549405575, "timestamp": "2025-09-30 22:18:19.230390", "step": 3781, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:19.276699", "step": 3781, "epoch": 3 }, { "type": "loss", "content": 0.008499288000166416, "timestamp": "2025-09-30 22:18:19.287765", "step": 3782, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:19.324157", "step": 3782, "epoch": 3 }, { "type": "loss", "content": 0.008562013506889343, "timestamp": "2025-09-30 22:18:19.337912", "step": 3783, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:19.378701", "step": 3783, "epoch": 3 }, { "type": "loss", "content": 0.0022412212565541267, "timestamp": "2025-09-30 22:18:19.411851", "step": 3784, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:19.453428", "step": 3784, "epoch": 3 }, { "type": "loss", "content": 0.018065424636006355, "timestamp": "2025-09-30 22:18:19.463669", "step": 3785, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:19.504192", "step": 3785, "epoch": 3 }, { "type": "loss", "content": 0.0039479536935687065, "timestamp": "2025-09-30 22:18:19.516736", "step": 3786, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:19.561715", "step": 3786, "epoch": 3 }, { "type": "loss", "content": 0.009036360308527946, "timestamp": "2025-09-30 22:18:19.575422", "step": 3787, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:19.616151", "step": 3787, "epoch": 3 }, { "type": "loss", "content": 0.01659063808619976, "timestamp": "2025-09-30 22:18:19.650346", "step": 3788, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:19.701756", "step": 3788, "epoch": 3 }, { "type": "loss", "content": 0.0018625481752678752, "timestamp": "2025-09-30 22:18:19.714789", "step": 3789, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:19.760397", "step": 3789, "epoch": 3 }, { "type": "loss", "content": 0.0016890355618670583, "timestamp": "2025-09-30 22:18:19.772752", "step": 3790, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:19.805736", "step": 3790, "epoch": 3 }, { "type": "loss", "content": 0.0023757508024573326, "timestamp": "2025-09-30 22:18:19.817934", "step": 3791, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:19.856916", "step": 3791, "epoch": 3 }, { "type": "loss", "content": 0.002403699792921543, "timestamp": "2025-09-30 22:18:19.885237", "step": 3792, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:19.929851", "step": 3792, "epoch": 3 }, { "type": "loss", "content": 0.0006492491811513901, "timestamp": "2025-09-30 22:18:19.937803", "step": 3793, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:19.977718", "step": 3793, "epoch": 3 }, { "type": "loss", "content": 0.0023878298234194517, "timestamp": "2025-09-30 22:18:19.991548", "step": 3794, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:20.031931", "step": 3794, "epoch": 3 }, { "type": "loss", "content": 0.0011837700149044394, "timestamp": "2025-09-30 22:18:20.044324", "step": 3795, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:18:22.618775", "step": 3795, "epoch": 3 }, { "type": "pplx", "content": 5.984622815359175, "timestamp": "2025-09-30 22:18:22.627025", "step": 3795, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:22.663615", "step": 3795, "epoch": 3 }, { "type": "loss", "content": 0.005366284865885973, "timestamp": "2025-09-30 22:18:22.694570", "step": 3796, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:22.732899", "step": 3796, "epoch": 3 }, { "type": "loss", "content": 0.0032648183405399323, "timestamp": "2025-09-30 22:18:22.741131", "step": 3797, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:22.781169", "step": 3797, "epoch": 3 }, { "type": "loss", "content": 0.018629658967256546, "timestamp": "2025-09-30 22:18:22.793708", "step": 3798, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:18:22.846167", "step": 3798, "epoch": 3 }, { "type": "loss", "content": 0.006669124588370323, "timestamp": "2025-09-30 22:18:22.862375", "step": 3799, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:22.909373", "step": 3799, "epoch": 3 }, { "type": "loss", "content": 0.009376229718327522, "timestamp": "2025-09-30 22:18:22.942558", "step": 3800, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:22.982068", "step": 3800, "epoch": 3 }, { "type": "loss", "content": 0.0030857212841510773, "timestamp": "2025-09-30 22:18:22.995425", "step": 3801, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:23.031864", "step": 3801, "epoch": 3 }, { "type": "loss", "content": 0.019586985930800438, "timestamp": "2025-09-30 22:18:23.040066", "step": 3802, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:23.078380", "step": 3802, "epoch": 3 }, { "type": "loss", "content": 0.0083575788885355, "timestamp": "2025-09-30 22:18:23.085212", "step": 3803, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:23.137035", "step": 3803, "epoch": 3 }, { "type": "loss", "content": 0.0033423728309571743, "timestamp": "2025-09-30 22:18:23.173728", "step": 3804, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:23.231733", "step": 3804, "epoch": 3 }, { "type": "loss", "content": 0.004513347055763006, "timestamp": "2025-09-30 22:18:23.239850", "step": 3805, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:23.279003", "step": 3805, "epoch": 3 }, { "type": "loss", "content": 0.008332960307598114, "timestamp": "2025-09-30 22:18:23.291274", "step": 3806, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:23.339024", "step": 3806, "epoch": 3 }, { "type": "loss", "content": 0.00962772872298956, "timestamp": "2025-09-30 22:18:23.350185", "step": 3807, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:23.394310", "step": 3807, "epoch": 3 }, { "type": "loss", "content": 0.004567010793834925, "timestamp": "2025-09-30 22:18:23.428979", "step": 3808, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:23.472171", "step": 3808, "epoch": 3 }, { "type": "loss", "content": 0.004992264788597822, "timestamp": "2025-09-30 22:18:23.485372", "step": 3809, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:23.531957", "step": 3809, "epoch": 3 }, { "type": "loss", "content": 0.006169665139168501, "timestamp": "2025-09-30 22:18:23.544523", "step": 3810, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:23.583335", "step": 3810, "epoch": 3 }, { "type": "loss", "content": 0.00960304494947195, "timestamp": "2025-09-30 22:18:23.597131", "step": 3811, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:23.644699", "step": 3811, "epoch": 3 }, { "type": "loss", "content": 0.004652353469282389, "timestamp": "2025-09-30 22:18:23.681314", "step": 3812, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:23.725157", "step": 3812, "epoch": 3 }, { "type": "loss", "content": 0.023862021043896675, "timestamp": "2025-09-30 22:18:23.740582", "step": 3813, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:18:23.789265", "step": 3813, "epoch": 3 }, { "type": "loss", "content": 0.012361971661448479, "timestamp": "2025-09-30 22:18:23.804918", "step": 3814, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:23.842579", "step": 3814, "epoch": 3 }, { "type": "loss", "content": 0.005945376120507717, "timestamp": "2025-09-30 22:18:23.853646", "step": 3815, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 560 ], "flops": 16611393146432 }, "timestamp": "2025-09-30 22:18:23.905098", "step": 3815, "epoch": 3 }, { "type": "loss", "content": 0.010283276438713074, "timestamp": "2025-09-30 22:18:23.945250", "step": 3816, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:18:23.986991", "step": 3816, "epoch": 3 }, { "type": "loss", "content": 0.004800677765160799, "timestamp": "2025-09-30 22:18:24.003662", "step": 3817, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:24.040227", "step": 3817, "epoch": 3 }, { "type": "loss", "content": 0.005037254188209772, "timestamp": "2025-09-30 22:18:24.054232", "step": 3818, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:24.089345", "step": 3818, "epoch": 3 }, { "type": "loss", "content": 0.005796071607619524, "timestamp": "2025-09-30 22:18:24.101490", "step": 3819, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:24.139434", "step": 3819, "epoch": 3 }, { "type": "loss", "content": 0.008076971396803856, "timestamp": "2025-09-30 22:18:24.173675", "step": 3820, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:18:24.216698", "step": 3820, "epoch": 3 }, { "type": "loss", "content": 0.008668972179293633, "timestamp": "2025-09-30 22:18:24.233433", "step": 3821, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:18:24.278952", "step": 3821, "epoch": 3 }, { "type": "loss", "content": 0.005224195308983326, "timestamp": "2025-09-30 22:18:24.296534", "step": 3822, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:24.339588", "step": 3822, "epoch": 3 }, { "type": "loss", "content": 0.010332217440009117, "timestamp": "2025-09-30 22:18:24.353305", "step": 3823, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:24.393084", "step": 3823, "epoch": 3 }, { "type": "loss", "content": 0.01401583757251501, "timestamp": "2025-09-30 22:18:24.421170", "step": 3824, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:24.460237", "step": 3824, "epoch": 3 }, { "type": "loss", "content": 0.004565891809761524, "timestamp": "2025-09-30 22:18:24.468096", "step": 3825, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:24.505472", "step": 3825, "epoch": 3 }, { "type": "loss", "content": 0.00525381974875927, "timestamp": "2025-09-30 22:18:24.518059", "step": 3826, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:24.553698", "step": 3826, "epoch": 3 }, { "type": "loss", "content": 0.005812001880258322, "timestamp": "2025-09-30 22:18:24.564784", "step": 3827, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:24.596735", "step": 3827, "epoch": 3 }, { "type": "loss", "content": 0.011806738562881947, "timestamp": "2025-09-30 22:18:24.625496", "step": 3828, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:24.662898", "step": 3828, "epoch": 3 }, { "type": "loss", "content": 0.004773608408868313, "timestamp": "2025-09-30 22:18:24.668565", "step": 3829, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:24.708771", "step": 3829, "epoch": 3 }, { "type": "loss", "content": 0.017275353893637657, "timestamp": "2025-09-30 22:18:24.722097", "step": 3830, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:18:24.767901", "step": 3830, "epoch": 3 }, { "type": "loss", "content": 0.014677359722554684, "timestamp": "2025-09-30 22:18:24.784095", "step": 3831, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:24.815545", "step": 3831, "epoch": 3 }, { "type": "loss", "content": 0.007634055335074663, "timestamp": "2025-09-30 22:18:24.844159", "step": 3832, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:24.877349", "step": 3832, "epoch": 3 }, { "type": "loss", "content": 0.009904236532747746, "timestamp": "2025-09-30 22:18:24.885428", "step": 3833, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:24.923168", "step": 3833, "epoch": 3 }, { "type": "loss", "content": 0.01268276758491993, "timestamp": "2025-09-30 22:18:24.931081", "step": 3834, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:24.963562", "step": 3834, "epoch": 3 }, { "type": "loss", "content": 0.011875173076987267, "timestamp": "2025-09-30 22:18:24.971164", "step": 3835, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:25.015227", "step": 3835, "epoch": 3 }, { "type": "loss", "content": 0.0084492526948452, "timestamp": "2025-09-30 22:18:25.042960", "step": 3836, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:25.079600", "step": 3836, "epoch": 3 }, { "type": "loss", "content": 0.007780611515045166, "timestamp": "2025-09-30 22:18:25.090165", "step": 3837, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:25.127255", "step": 3837, "epoch": 3 }, { "type": "loss", "content": 0.0065346043556928635, "timestamp": "2025-09-30 22:18:25.138471", "step": 3838, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:25.190501", "step": 3838, "epoch": 3 }, { "type": "loss", "content": 0.008517752401530743, "timestamp": "2025-09-30 22:18:25.201696", "step": 3839, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:25.247126", "step": 3839, "epoch": 3 }, { "type": "loss", "content": 0.009599901735782623, "timestamp": "2025-09-30 22:18:25.282003", "step": 3840, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:25.324605", "step": 3840, "epoch": 3 }, { "type": "loss", "content": 0.010341773740947247, "timestamp": "2025-09-30 22:18:25.335371", "step": 3841, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:25.377590", "step": 3841, "epoch": 3 }, { "type": "loss", "content": 0.008768843486905098, "timestamp": "2025-09-30 22:18:25.391455", "step": 3842, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:25.440471", "step": 3842, "epoch": 3 }, { "type": "loss", "content": 0.012737488374114037, "timestamp": "2025-09-30 22:18:25.454142", "step": 3843, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:18:25.518195", "step": 3843, "epoch": 3 }, { "type": "loss", "content": 0.008995888754725456, "timestamp": "2025-09-30 22:18:25.554696", "step": 3844, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:25.587401", "step": 3844, "epoch": 3 }, { "type": "loss", "content": 0.008873233571648598, "timestamp": "2025-09-30 22:18:25.597391", "step": 3845, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:25.653250", "step": 3845, "epoch": 3 }, { "type": "loss", "content": 0.005939814727753401, "timestamp": "2025-09-30 22:18:25.667080", "step": 3846, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:25.703290", "step": 3846, "epoch": 3 }, { "type": "loss", "content": 0.01119601633399725, "timestamp": "2025-09-30 22:18:25.715584", "step": 3847, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:25.754438", "step": 3847, "epoch": 3 }, { "type": "loss", "content": 0.00690952455624938, "timestamp": "2025-09-30 22:18:25.789268", "step": 3848, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:25.824949", "step": 3848, "epoch": 3 }, { "type": "loss", "content": 0.007601768709719181, "timestamp": "2025-09-30 22:18:25.830640", "step": 3849, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:25.866163", "step": 3849, "epoch": 3 }, { "type": "loss", "content": 0.00634763902053237, "timestamp": "2025-09-30 22:18:25.878698", "step": 3850, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:25.920077", "step": 3850, "epoch": 3 }, { "type": "loss", "content": 0.007674683816730976, "timestamp": "2025-09-30 22:18:25.931010", "step": 3851, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:25.967808", "step": 3851, "epoch": 3 }, { "type": "loss", "content": 0.00828044954687357, "timestamp": "2025-09-30 22:18:26.001300", "step": 3852, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:26.044886", "step": 3852, "epoch": 3 }, { "type": "loss", "content": 0.009877484291791916, "timestamp": "2025-09-30 22:18:26.055797", "step": 3853, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:26.093707", "step": 3853, "epoch": 3 }, { "type": "loss", "content": 0.00591434957459569, "timestamp": "2025-09-30 22:18:26.103992", "step": 3854, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:26.140192", "step": 3854, "epoch": 3 }, { "type": "loss", "content": 0.003982429392635822, "timestamp": "2025-09-30 22:18:26.152462", "step": 3855, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:26.196468", "step": 3855, "epoch": 3 }, { "type": "loss", "content": 0.008871220983564854, "timestamp": "2025-09-30 22:18:26.225283", "step": 3856, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:26.260461", "step": 3856, "epoch": 3 }, { "type": "loss", "content": 0.009687988087534904, "timestamp": "2025-09-30 22:18:26.265947", "step": 3857, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:26.301410", "step": 3857, "epoch": 3 }, { "type": "loss", "content": 0.007140415720641613, "timestamp": "2025-09-30 22:18:26.309245", "step": 3858, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:26.350114", "step": 3858, "epoch": 3 }, { "type": "loss", "content": 0.0017469810554757714, "timestamp": "2025-09-30 22:18:26.363714", "step": 3859, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:26.408542", "step": 3859, "epoch": 3 }, { "type": "loss", "content": 0.00501214899122715, "timestamp": "2025-09-30 22:18:26.436960", "step": 3860, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:26.481463", "step": 3860, "epoch": 3 }, { "type": "loss", "content": 0.006503783632069826, "timestamp": "2025-09-30 22:18:26.491532", "step": 3861, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:26.533909", "step": 3861, "epoch": 3 }, { "type": "loss", "content": 0.011045904830098152, "timestamp": "2025-09-30 22:18:26.544219", "step": 3862, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:26.582027", "step": 3862, "epoch": 3 }, { "type": "loss", "content": 0.007116046734154224, "timestamp": "2025-09-30 22:18:26.589410", "step": 3863, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:26.625089", "step": 3863, "epoch": 3 }, { "type": "loss", "content": 0.006851586047559977, "timestamp": "2025-09-30 22:18:26.658113", "step": 3864, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:26.695516", "step": 3864, "epoch": 3 }, { "type": "loss", "content": 0.0008578465203754604, "timestamp": "2025-09-30 22:18:26.708599", "step": 3865, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:26.755117", "step": 3865, "epoch": 3 }, { "type": "loss", "content": 0.006214868277311325, "timestamp": "2025-09-30 22:18:26.763762", "step": 3866, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:18:26.811109", "step": 3866, "epoch": 3 }, { "type": "loss", "content": 0.004790063947439194, "timestamp": "2025-09-30 22:18:26.827288", "step": 3867, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:26.871596", "step": 3867, "epoch": 3 }, { "type": "loss", "content": 0.006224909331649542, "timestamp": "2025-09-30 22:18:26.905803", "step": 3868, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:26.949964", "step": 3868, "epoch": 3 }, { "type": "loss", "content": 0.0054021235555410385, "timestamp": "2025-09-30 22:18:26.960670", "step": 3869, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:26.998448", "step": 3869, "epoch": 3 }, { "type": "loss", "content": 0.007381778676062822, "timestamp": "2025-09-30 22:18:27.009466", "step": 3870, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:27.044310", "step": 3870, "epoch": 3 }, { "type": "loss", "content": 0.012521324679255486, "timestamp": "2025-09-30 22:18:27.052354", "step": 3871, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:27.090489", "step": 3871, "epoch": 3 }, { "type": "loss", "content": 0.007201758679002523, "timestamp": "2025-09-30 22:18:27.123853", "step": 3872, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:18:27.166188", "step": 3872, "epoch": 3 }, { "type": "loss", "content": 0.004940586630254984, "timestamp": "2025-09-30 22:18:27.171494", "step": 3873, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:27.210102", "step": 3873, "epoch": 3 }, { "type": "loss", "content": 0.008473108522593975, "timestamp": "2025-09-30 22:18:27.220601", "step": 3874, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:27.260120", "step": 3874, "epoch": 3 }, { "type": "loss", "content": 0.005420765373855829, "timestamp": "2025-09-30 22:18:27.273873", "step": 3875, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:27.306808", "step": 3875, "epoch": 3 }, { "type": "loss", "content": 0.011063523590564728, "timestamp": "2025-09-30 22:18:27.335230", "step": 3876, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:27.383162", "step": 3876, "epoch": 3 }, { "type": "loss", "content": 0.010778565891087055, "timestamp": "2025-09-30 22:18:27.395787", "step": 3877, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:27.470218", "step": 3877, "epoch": 3 }, { "type": "loss", "content": 0.005143335554748774, "timestamp": "2025-09-30 22:18:27.482789", "step": 3878, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:27.535931", "step": 3878, "epoch": 3 }, { "type": "loss", "content": 0.006342253182083368, "timestamp": "2025-09-30 22:18:27.551835", "step": 3879, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:27.589350", "step": 3879, "epoch": 3 }, { "type": "loss", "content": 0.0035624420270323753, "timestamp": "2025-09-30 22:18:27.622817", "step": 3880, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:27.671436", "step": 3880, "epoch": 3 }, { "type": "loss", "content": 0.010757009498775005, "timestamp": "2025-09-30 22:18:27.676138", "step": 3881, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:27.718742", "step": 3881, "epoch": 3 }, { "type": "loss", "content": 0.0025167877320200205, "timestamp": "2025-09-30 22:18:27.729051", "step": 3882, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:27.773613", "step": 3882, "epoch": 3 }, { "type": "loss", "content": 0.007043273653835058, "timestamp": "2025-09-30 22:18:27.789434", "step": 3883, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:27.829959", "step": 3883, "epoch": 3 }, { "type": "loss", "content": 0.006122028920799494, "timestamp": "2025-09-30 22:18:27.864591", "step": 3884, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:27.902894", "step": 3884, "epoch": 3 }, { "type": "loss", "content": 0.005711785517632961, "timestamp": "2025-09-30 22:18:27.915338", "step": 3885, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:27.964986", "step": 3885, "epoch": 3 }, { "type": "loss", "content": 0.0050542643293738365, "timestamp": "2025-09-30 22:18:27.975391", "step": 3886, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:28.025093", "step": 3886, "epoch": 3 }, { "type": "loss", "content": 0.012293345294892788, "timestamp": "2025-09-30 22:18:28.035476", "step": 3887, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:28.074132", "step": 3887, "epoch": 3 }, { "type": "loss", "content": 0.005285812076181173, "timestamp": "2025-09-30 22:18:28.105291", "step": 3888, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:28.148314", "step": 3888, "epoch": 3 }, { "type": "loss", "content": 0.007423700764775276, "timestamp": "2025-09-30 22:18:28.153994", "step": 3889, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:28.192415", "step": 3889, "epoch": 3 }, { "type": "loss", "content": 0.0060288263484835625, "timestamp": "2025-09-30 22:18:28.205836", "step": 3890, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:28.248177", "step": 3890, "epoch": 3 }, { "type": "loss", "content": 0.004893283825367689, "timestamp": "2025-09-30 22:18:28.261984", "step": 3891, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:28.305167", "step": 3891, "epoch": 3 }, { "type": "loss", "content": 0.0061093950644135475, "timestamp": "2025-09-30 22:18:28.339808", "step": 3892, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:28.379637", "step": 3892, "epoch": 3 }, { "type": "loss", "content": 0.0057939523831009865, "timestamp": "2025-09-30 22:18:28.392243", "step": 3893, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:28.434929", "step": 3893, "epoch": 3 }, { "type": "loss", "content": 0.00592371542006731, "timestamp": "2025-09-30 22:18:28.447321", "step": 3894, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:28.493332", "step": 3894, "epoch": 3 }, { "type": "loss", "content": 0.0073332227766513824, "timestamp": "2025-09-30 22:18:28.506723", "step": 3895, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:28.560900", "step": 3895, "epoch": 3 }, { "type": "loss", "content": 0.009659533388912678, "timestamp": "2025-09-30 22:18:28.595542", "step": 3896, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:28.659981", "step": 3896, "epoch": 3 }, { "type": "loss", "content": 0.011878445744514465, "timestamp": "2025-09-30 22:18:28.668157", "step": 3897, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:28.713444", "step": 3897, "epoch": 3 }, { "type": "loss", "content": 0.004296146798878908, "timestamp": "2025-09-30 22:18:28.727126", "step": 3898, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:28.770444", "step": 3898, "epoch": 3 }, { "type": "loss", "content": 0.005357000045478344, "timestamp": "2025-09-30 22:18:28.778436", "step": 3899, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:28.815814", "step": 3899, "epoch": 3 }, { "type": "loss", "content": 0.007863102480769157, "timestamp": "2025-09-30 22:18:28.850118", "step": 3900, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:28.901235", "step": 3900, "epoch": 3 }, { "type": "loss", "content": 0.008372662588953972, "timestamp": "2025-09-30 22:18:28.911357", "step": 3901, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:28.952095", "step": 3901, "epoch": 3 }, { "type": "loss", "content": 0.007149435579776764, "timestamp": "2025-09-30 22:18:28.963217", "step": 3902, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:29.001344", "step": 3902, "epoch": 3 }, { "type": "loss", "content": 0.007008867803961039, "timestamp": "2025-09-30 22:18:29.013604", "step": 3903, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:29.056577", "step": 3903, "epoch": 3 }, { "type": "loss", "content": 0.006546663120388985, "timestamp": "2025-09-30 22:18:29.091170", "step": 3904, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:29.141184", "step": 3904, "epoch": 3 }, { "type": "loss", "content": 0.004806919023394585, "timestamp": "2025-09-30 22:18:29.153898", "step": 3905, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:29.192991", "step": 3905, "epoch": 3 }, { "type": "loss", "content": 0.007764711976051331, "timestamp": "2025-09-30 22:18:29.205574", "step": 3906, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:29.262414", "step": 3906, "epoch": 3 }, { "type": "loss", "content": 0.006050893571227789, "timestamp": "2025-09-30 22:18:29.275799", "step": 3907, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:29.319053", "step": 3907, "epoch": 3 }, { "type": "loss", "content": 0.005481482483446598, "timestamp": "2025-09-30 22:18:29.352270", "step": 3908, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:29.415848", "step": 3908, "epoch": 3 }, { "type": "loss", "content": 0.008061875589191914, "timestamp": "2025-09-30 22:18:29.428530", "step": 3909, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:29.471366", "step": 3909, "epoch": 3 }, { "type": "loss", "content": 0.004771022126078606, "timestamp": "2025-09-30 22:18:29.478610", "step": 3910, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:18:32.259037", "step": 3910, "epoch": 3 }, { "type": "pplx", "content": 5.814706217503361, "timestamp": "2025-09-30 22:18:32.261672", "step": 3910, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:32.294648", "step": 3910, "epoch": 3 }, { "type": "loss", "content": 0.0073614949360489845, "timestamp": "2025-09-30 22:18:32.306895", "step": 3911, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:32.354228", "step": 3911, "epoch": 3 }, { "type": "loss", "content": 0.008830001577734947, "timestamp": "2025-09-30 22:18:32.383121", "step": 3912, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:32.444790", "step": 3912, "epoch": 3 }, { "type": "loss", "content": 0.009699201211333275, "timestamp": "2025-09-30 22:18:32.455253", "step": 3913, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:32.491250", "step": 3913, "epoch": 3 }, { "type": "loss", "content": 0.007017786148935556, "timestamp": "2025-09-30 22:18:32.502320", "step": 3914, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:32.568903", "step": 3914, "epoch": 3 }, { "type": "loss", "content": 0.009106282144784927, "timestamp": "2025-09-30 22:18:32.576467", "step": 3915, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:18:32.632651", "step": 3915, "epoch": 3 }, { "type": "loss", "content": 0.0056086317636072636, "timestamp": "2025-09-30 22:18:32.670588", "step": 3916, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:32.710909", "step": 3916, "epoch": 3 }, { "type": "loss", "content": 0.004006392788141966, "timestamp": "2025-09-30 22:18:32.723547", "step": 3917, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:32.765443", "step": 3917, "epoch": 3 }, { "type": "loss", "content": 0.010423400439321995, "timestamp": "2025-09-30 22:18:32.777771", "step": 3918, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:32.838754", "step": 3918, "epoch": 3 }, { "type": "loss", "content": 0.00975536648184061, "timestamp": "2025-09-30 22:18:32.846598", "step": 3919, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:32.884965", "step": 3919, "epoch": 3 }, { "type": "loss", "content": 0.007408326957374811, "timestamp": "2025-09-30 22:18:32.918150", "step": 3920, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:32.962424", "step": 3920, "epoch": 3 }, { "type": "loss", "content": 0.012103984132409096, "timestamp": "2025-09-30 22:18:32.970610", "step": 3921, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:33.011097", "step": 3921, "epoch": 3 }, { "type": "loss", "content": 0.0038203338626772165, "timestamp": "2025-09-30 22:18:33.023633", "step": 3922, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:33.059940", "step": 3922, "epoch": 3 }, { "type": "loss", "content": 0.002586872549727559, "timestamp": "2025-09-30 22:18:33.067903", "step": 3923, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:33.106161", "step": 3923, "epoch": 3 }, { "type": "loss", "content": 0.0026292195543646812, "timestamp": "2025-09-30 22:18:33.140392", "step": 3924, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:33.199633", "step": 3924, "epoch": 3 }, { "type": "loss", "content": 0.011017059907317162, "timestamp": "2025-09-30 22:18:33.208410", "step": 3925, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:33.256450", "step": 3925, "epoch": 3 }, { "type": "loss", "content": 0.006252105347812176, "timestamp": "2025-09-30 22:18:33.268726", "step": 3926, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:33.312775", "step": 3926, "epoch": 3 }, { "type": "loss", "content": 0.007171166129410267, "timestamp": "2025-09-30 22:18:33.325119", "step": 3927, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:33.363839", "step": 3927, "epoch": 3 }, { "type": "loss", "content": 0.007750141900032759, "timestamp": "2025-09-30 22:18:33.398092", "step": 3928, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:33.438349", "step": 3928, "epoch": 3 }, { "type": "loss", "content": 0.013963129371404648, "timestamp": "2025-09-30 22:18:33.451391", "step": 3929, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:33.495607", "step": 3929, "epoch": 3 }, { "type": "loss", "content": 0.008714791387319565, "timestamp": "2025-09-30 22:18:33.506774", "step": 3930, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:33.543823", "step": 3930, "epoch": 3 }, { "type": "loss", "content": 0.034263599663972855, "timestamp": "2025-09-30 22:18:33.554882", "step": 3931, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:33.605646", "step": 3931, "epoch": 3 }, { "type": "loss", "content": 0.005537922028452158, "timestamp": "2025-09-30 22:18:33.640301", "step": 3932, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:33.678271", "step": 3932, "epoch": 3 }, { "type": "loss", "content": 0.01095846202224493, "timestamp": "2025-09-30 22:18:33.691235", "step": 3933, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:18:33.736791", "step": 3933, "epoch": 3 }, { "type": "loss", "content": 0.0030966904014348984, "timestamp": "2025-09-30 22:18:33.752451", "step": 3934, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:33.800220", "step": 3934, "epoch": 3 }, { "type": "loss", "content": 0.002290707314386964, "timestamp": "2025-09-30 22:18:33.813574", "step": 3935, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:33.858852", "step": 3935, "epoch": 3 }, { "type": "loss", "content": 0.00609651068225503, "timestamp": "2025-09-30 22:18:33.892243", "step": 3936, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:33.954454", "step": 3936, "epoch": 3 }, { "type": "loss", "content": 0.00569250900298357, "timestamp": "2025-09-30 22:18:33.967821", "step": 3937, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:34.012184", "step": 3937, "epoch": 3 }, { "type": "loss", "content": 0.013253239914774895, "timestamp": "2025-09-30 22:18:34.028048", "step": 3938, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:34.076862", "step": 3938, "epoch": 3 }, { "type": "loss", "content": 0.0021241006907075644, "timestamp": "2025-09-30 22:18:34.090674", "step": 3939, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:34.167622", "step": 3939, "epoch": 3 }, { "type": "loss", "content": 0.0024524808395653963, "timestamp": "2025-09-30 22:18:34.204414", "step": 3940, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:34.255718", "step": 3940, "epoch": 3 }, { "type": "loss", "content": 0.007233856245875359, "timestamp": "2025-09-30 22:18:34.265573", "step": 3941, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:34.323678", "step": 3941, "epoch": 3 }, { "type": "loss", "content": 0.012206872925162315, "timestamp": "2025-09-30 22:18:34.336997", "step": 3942, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:34.383903", "step": 3942, "epoch": 3 }, { "type": "loss", "content": 0.007985597476363182, "timestamp": "2025-09-30 22:18:34.397627", "step": 3943, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:34.434976", "step": 3943, "epoch": 3 }, { "type": "loss", "content": 0.008505703881382942, "timestamp": "2025-09-30 22:18:34.469477", "step": 3944, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:34.521002", "step": 3944, "epoch": 3 }, { "type": "loss", "content": 0.008168045431375504, "timestamp": "2025-09-30 22:18:34.534212", "step": 3945, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:34.588227", "step": 3945, "epoch": 3 }, { "type": "loss", "content": 0.004361226689070463, "timestamp": "2025-09-30 22:18:34.600636", "step": 3946, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:34.647458", "step": 3946, "epoch": 3 }, { "type": "loss", "content": 0.012169808149337769, "timestamp": "2025-09-30 22:18:34.660867", "step": 3947, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:34.705853", "step": 3947, "epoch": 3 }, { "type": "loss", "content": 0.006339035928249359, "timestamp": "2025-09-30 22:18:34.740478", "step": 3948, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:18:34.792364", "step": 3948, "epoch": 3 }, { "type": "loss", "content": 0.004964579362422228, "timestamp": "2025-09-30 22:18:34.809380", "step": 3949, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:34.870754", "step": 3949, "epoch": 3 }, { "type": "loss", "content": 0.0011956646339967847, "timestamp": "2025-09-30 22:18:34.884161", "step": 3950, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:34.919046", "step": 3950, "epoch": 3 }, { "type": "loss", "content": 0.00508431950584054, "timestamp": "2025-09-30 22:18:34.926944", "step": 3951, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:34.982616", "step": 3951, "epoch": 3 }, { "type": "loss", "content": 0.007752721197903156, "timestamp": "2025-09-30 22:18:35.013877", "step": 3952, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:35.053195", "step": 3952, "epoch": 3 }, { "type": "loss", "content": 0.024671725928783417, "timestamp": "2025-09-30 22:18:35.058838", "step": 3953, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:35.118430", "step": 3953, "epoch": 3 }, { "type": "loss", "content": 0.004845732357352972, "timestamp": "2025-09-30 22:18:35.130985", "step": 3954, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:35.191301", "step": 3954, "epoch": 3 }, { "type": "loss", "content": 0.006331304553896189, "timestamp": "2025-09-30 22:18:35.204688", "step": 3955, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:35.246378", "step": 3955, "epoch": 3 }, { "type": "loss", "content": 0.0057160560972988605, "timestamp": "2025-09-30 22:18:35.280800", "step": 3956, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:35.321808", "step": 3956, "epoch": 3 }, { "type": "loss", "content": 0.0051583037711679935, "timestamp": "2025-09-30 22:18:35.332309", "step": 3957, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:35.379711", "step": 3957, "epoch": 3 }, { "type": "loss", "content": 0.006589068099856377, "timestamp": "2025-09-30 22:18:35.393015", "step": 3958, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:18:35.442600", "step": 3958, "epoch": 3 }, { "type": "loss", "content": 0.00613076938316226, "timestamp": "2025-09-30 22:18:35.458904", "step": 3959, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:35.511880", "step": 3959, "epoch": 3 }, { "type": "loss", "content": 0.003702125744894147, "timestamp": "2025-09-30 22:18:35.548579", "step": 3960, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:35.607235", "step": 3960, "epoch": 3 }, { "type": "loss", "content": 0.0032431448344141245, "timestamp": "2025-09-30 22:18:35.620619", "step": 3961, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:35.671986", "step": 3961, "epoch": 3 }, { "type": "loss", "content": 0.00313235679641366, "timestamp": "2025-09-30 22:18:35.682535", "step": 3962, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:18:35.734631", "step": 3962, "epoch": 3 }, { "type": "loss", "content": 0.003487497800961137, "timestamp": "2025-09-30 22:18:35.750797", "step": 3963, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:35.795430", "step": 3963, "epoch": 3 }, { "type": "loss", "content": 0.003668895922601223, "timestamp": "2025-09-30 22:18:35.830039", "step": 3964, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:35.864463", "step": 3964, "epoch": 3 }, { "type": "loss", "content": 0.0067418343387544155, "timestamp": "2025-09-30 22:18:35.874247", "step": 3965, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:35.926358", "step": 3965, "epoch": 3 }, { "type": "loss", "content": 0.0065846871584653854, "timestamp": "2025-09-30 22:18:35.934000", "step": 3966, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:35.967325", "step": 3966, "epoch": 3 }, { "type": "loss", "content": 0.0036082533188164234, "timestamp": "2025-09-30 22:18:35.977750", "step": 3967, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:36.014897", "step": 3967, "epoch": 3 }, { "type": "loss", "content": 0.004577101673930883, "timestamp": "2025-09-30 22:18:36.046115", "step": 3968, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:36.092447", "step": 3968, "epoch": 3 }, { "type": "loss", "content": 0.012322023510932922, "timestamp": "2025-09-30 22:18:36.105592", "step": 3969, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:36.141831", "step": 3969, "epoch": 3 }, { "type": "loss", "content": 0.002453200053423643, "timestamp": "2025-09-30 22:18:36.152026", "step": 3970, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:36.205457", "step": 3970, "epoch": 3 }, { "type": "loss", "content": 0.007268570829182863, "timestamp": "2025-09-30 22:18:36.219466", "step": 3971, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:36.258169", "step": 3971, "epoch": 3 }, { "type": "loss", "content": 0.007819430902600288, "timestamp": "2025-09-30 22:18:36.291295", "step": 3972, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:18:36.335473", "step": 3972, "epoch": 3 }, { "type": "loss", "content": 0.007106750272214413, "timestamp": "2025-09-30 22:18:36.350616", "step": 3973, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:36.396747", "step": 3973, "epoch": 3 }, { "type": "loss", "content": 0.010787696577608585, "timestamp": "2025-09-30 22:18:36.412628", "step": 3974, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:36.458589", "step": 3974, "epoch": 3 }, { "type": "loss", "content": 0.005603353027254343, "timestamp": "2025-09-30 22:18:36.469670", "step": 3975, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:36.520178", "step": 3975, "epoch": 3 }, { "type": "loss", "content": 0.004301795735955238, "timestamp": "2025-09-30 22:18:36.551225", "step": 3976, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:36.587429", "step": 3976, "epoch": 3 }, { "type": "loss", "content": 0.004883557092398405, "timestamp": "2025-09-30 22:18:36.596163", "step": 3977, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:36.644774", "step": 3977, "epoch": 3 }, { "type": "loss", "content": 0.006246346514672041, "timestamp": "2025-09-30 22:18:36.658201", "step": 3978, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:36.709559", "step": 3978, "epoch": 3 }, { "type": "loss", "content": 0.00462919007986784, "timestamp": "2025-09-30 22:18:36.722979", "step": 3979, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:36.769480", "step": 3979, "epoch": 3 }, { "type": "loss", "content": 0.008667691610753536, "timestamp": "2025-09-30 22:18:36.802693", "step": 3980, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:36.848405", "step": 3980, "epoch": 3 }, { "type": "loss", "content": 0.005111383739858866, "timestamp": "2025-09-30 22:18:36.857147", "step": 3981, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:36.899671", "step": 3981, "epoch": 3 }, { "type": "loss", "content": 0.0035054711624979973, "timestamp": "2025-09-30 22:18:36.911977", "step": 3982, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:36.957184", "step": 3982, "epoch": 3 }, { "type": "loss", "content": 0.0011335327289998531, "timestamp": "2025-09-30 22:18:36.964399", "step": 3983, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:37.016493", "step": 3983, "epoch": 3 }, { "type": "loss", "content": 0.004883974324911833, "timestamp": "2025-09-30 22:18:37.053224", "step": 3984, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:37.089444", "step": 3984, "epoch": 3 }, { "type": "loss", "content": 0.009602776728570461, "timestamp": "2025-09-30 22:18:37.095338", "step": 3985, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:37.141870", "step": 3985, "epoch": 3 }, { "type": "loss", "content": 0.014472908340394497, "timestamp": "2025-09-30 22:18:37.148915", "step": 3986, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:37.184512", "step": 3986, "epoch": 3 }, { "type": "loss", "content": 0.002878964878618717, "timestamp": "2025-09-30 22:18:37.194883", "step": 3987, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:37.240943", "step": 3987, "epoch": 3 }, { "type": "loss", "content": 0.007471582852303982, "timestamp": "2025-09-30 22:18:37.272809", "step": 3988, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:37.312852", "step": 3988, "epoch": 3 }, { "type": "loss", "content": 0.011246595531702042, "timestamp": "2025-09-30 22:18:37.326152", "step": 3989, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:37.361723", "step": 3989, "epoch": 3 }, { "type": "loss", "content": 0.004211165476590395, "timestamp": "2025-09-30 22:18:37.369749", "step": 3990, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:37.407055", "step": 3990, "epoch": 3 }, { "type": "loss", "content": 0.00475333584472537, "timestamp": "2025-09-30 22:18:37.414272", "step": 3991, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:37.454266", "step": 3991, "epoch": 3 }, { "type": "loss", "content": 0.006536404136568308, "timestamp": "2025-09-30 22:18:37.486217", "step": 3992, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:37.531183", "step": 3992, "epoch": 3 }, { "type": "loss", "content": 0.004319012630730867, "timestamp": "2025-09-30 22:18:37.543767", "step": 3993, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:37.578660", "step": 3993, "epoch": 3 }, { "type": "loss", "content": 0.004011413082480431, "timestamp": "2025-09-30 22:18:37.586415", "step": 3994, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:37.623451", "step": 3994, "epoch": 3 }, { "type": "loss", "content": 0.007326128892600536, "timestamp": "2025-09-30 22:18:37.631307", "step": 3995, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:37.672911", "step": 3995, "epoch": 3 }, { "type": "loss", "content": 0.007306681480258703, "timestamp": "2025-09-30 22:18:37.707546", "step": 3996, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:37.760139", "step": 3996, "epoch": 3 }, { "type": "loss", "content": 0.006555972620844841, "timestamp": "2025-09-30 22:18:37.775552", "step": 3997, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:18:37.835272", "step": 3997, "epoch": 3 }, { "type": "loss", "content": 0.007153916638344526, "timestamp": "2025-09-30 22:18:37.852431", "step": 3998, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:37.905432", "step": 3998, "epoch": 3 }, { "type": "loss", "content": 0.007474151905626059, "timestamp": "2025-09-30 22:18:37.919245", "step": 3999, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:37.961319", "step": 3999, "epoch": 3 }, { "type": "loss", "content": 0.004183096345514059, "timestamp": "2025-09-30 22:18:37.996197", "step": 4000, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 4000", "timestamp": "2025-09-30 22:18:43.644436", "step": 4000, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:43.694862", "step": 4000, "epoch": 3 }, { "type": "loss", "content": 0.009015398100018501, "timestamp": "2025-09-30 22:18:43.701075", "step": 4001, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:43.740755", "step": 4001, "epoch": 3 }, { "type": "loss", "content": 0.011754374019801617, "timestamp": "2025-09-30 22:18:43.754477", "step": 4002, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:43.791366", "step": 4002, "epoch": 3 }, { "type": "loss", "content": 0.006731737405061722, "timestamp": "2025-09-30 22:18:43.803097", "step": 4003, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:43.855445", "step": 4003, "epoch": 3 }, { "type": "loss", "content": 0.0032603219151496887, "timestamp": "2025-09-30 22:18:43.889653", "step": 4004, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:43.942019", "step": 4004, "epoch": 3 }, { "type": "loss", "content": 0.004216242115944624, "timestamp": "2025-09-30 22:18:43.955376", "step": 4005, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:44.001833", "step": 4005, "epoch": 3 }, { "type": "loss", "content": 0.006579623557627201, "timestamp": "2025-09-30 22:18:44.015234", "step": 4006, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:44.059013", "step": 4006, "epoch": 3 }, { "type": "loss", "content": 0.007562713697552681, "timestamp": "2025-09-30 22:18:44.071626", "step": 4007, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:44.109601", "step": 4007, "epoch": 3 }, { "type": "loss", "content": 0.005524170119315386, "timestamp": "2025-09-30 22:18:44.144261", "step": 4008, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:44.184357", "step": 4008, "epoch": 3 }, { "type": "loss", "content": 0.00575696025043726, "timestamp": "2025-09-30 22:18:44.192374", "step": 4009, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:44.237343", "step": 4009, "epoch": 3 }, { "type": "loss", "content": 0.004735125228762627, "timestamp": "2025-09-30 22:18:44.249936", "step": 4010, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:44.290493", "step": 4010, "epoch": 3 }, { "type": "loss", "content": 0.00848841480910778, "timestamp": "2025-09-30 22:18:44.297656", "step": 4011, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:44.336032", "step": 4011, "epoch": 3 }, { "type": "loss", "content": 0.005274408962577581, "timestamp": "2025-09-30 22:18:44.364182", "step": 4012, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:44.410082", "step": 4012, "epoch": 3 }, { "type": "loss", "content": 0.0018573726993054152, "timestamp": "2025-09-30 22:18:44.414767", "step": 4013, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:44.462993", "step": 4013, "epoch": 3 }, { "type": "loss", "content": 0.010334143415093422, "timestamp": "2025-09-30 22:18:44.470329", "step": 4014, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:44.514719", "step": 4014, "epoch": 3 }, { "type": "loss", "content": 0.009302409365773201, "timestamp": "2025-09-30 22:18:44.527262", "step": 4015, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:18:44.582526", "step": 4015, "epoch": 3 }, { "type": "loss", "content": 0.0037581834476441145, "timestamp": "2025-09-30 22:18:44.607587", "step": 4016, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:44.645433", "step": 4016, "epoch": 3 }, { "type": "loss", "content": 0.0035524829290807247, "timestamp": "2025-09-30 22:18:44.658402", "step": 4017, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:44.696112", "step": 4017, "epoch": 3 }, { "type": "loss", "content": 0.003935859072953463, "timestamp": "2025-09-30 22:18:44.706343", "step": 4018, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:44.744446", "step": 4018, "epoch": 3 }, { "type": "loss", "content": 0.005983736366033554, "timestamp": "2025-09-30 22:18:44.758242", "step": 4019, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:18:44.812181", "step": 4019, "epoch": 3 }, { "type": "loss", "content": 0.007891970686614513, "timestamp": "2025-09-30 22:18:44.848617", "step": 4020, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:44.894541", "step": 4020, "epoch": 3 }, { "type": "loss", "content": 0.008960836566984653, "timestamp": "2025-09-30 22:18:44.907616", "step": 4021, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:44.948956", "step": 4021, "epoch": 3 }, { "type": "loss", "content": 0.008770717307925224, "timestamp": "2025-09-30 22:18:44.962472", "step": 4022, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:45.006957", "step": 4022, "epoch": 3 }, { "type": "loss", "content": 0.006733729038387537, "timestamp": "2025-09-30 22:18:45.020930", "step": 4023, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:45.068308", "step": 4023, "epoch": 3 }, { "type": "loss", "content": 0.010043778456747532, "timestamp": "2025-09-30 22:18:45.102656", "step": 4024, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:45.153104", "step": 4024, "epoch": 3 }, { "type": "loss", "content": 0.011883224360644817, "timestamp": "2025-09-30 22:18:45.161692", "step": 4025, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:18:47.846177", "step": 4025, "epoch": 3 }, { "type": "pplx", "content": 5.809886067136617, "timestamp": "2025-09-30 22:18:47.851662", "step": 4025, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:47.894419", "step": 4025, "epoch": 3 }, { "type": "loss", "content": 0.004332016687840223, "timestamp": "2025-09-30 22:18:47.907791", "step": 4026, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:47.961457", "step": 4026, "epoch": 3 }, { "type": "loss", "content": 0.006139960139989853, "timestamp": "2025-09-30 22:18:47.968640", "step": 4027, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:48.015492", "step": 4027, "epoch": 3 }, { "type": "loss", "content": 0.003429949749261141, "timestamp": "2025-09-30 22:18:48.044411", "step": 4028, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:48.082663", "step": 4028, "epoch": 3 }, { "type": "loss", "content": 0.005339586641639471, "timestamp": "2025-09-30 22:18:48.100991", "step": 4029, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:48.136568", "step": 4029, "epoch": 3 }, { "type": "loss", "content": 0.0034270884934812784, "timestamp": "2025-09-30 22:18:48.147577", "step": 4030, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:48.186602", "step": 4030, "epoch": 3 }, { "type": "loss", "content": 0.002700925339013338, "timestamp": "2025-09-30 22:18:48.193394", "step": 4031, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:48.246419", "step": 4031, "epoch": 3 }, { "type": "loss", "content": 0.008455545641481876, "timestamp": "2025-09-30 22:18:48.277518", "step": 4032, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:48.323026", "step": 4032, "epoch": 3 }, { "type": "loss", "content": 0.010105387307703495, "timestamp": "2025-09-30 22:18:48.331881", "step": 4033, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:48.374076", "step": 4033, "epoch": 3 }, { "type": "loss", "content": 0.005560815799981356, "timestamp": "2025-09-30 22:18:48.382397", "step": 4034, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:48.435088", "step": 4034, "epoch": 3 }, { "type": "loss", "content": 0.009280094876885414, "timestamp": "2025-09-30 22:18:48.445050", "step": 4035, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:48.482514", "step": 4035, "epoch": 3 }, { "type": "loss", "content": 0.0043348814360797405, "timestamp": "2025-09-30 22:18:48.518953", "step": 4036, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:48.574409", "step": 4036, "epoch": 3 }, { "type": "loss", "content": 0.007800571154803038, "timestamp": "2025-09-30 22:18:48.587588", "step": 4037, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:48.641475", "step": 4037, "epoch": 3 }, { "type": "loss", "content": 0.006277676206082106, "timestamp": "2025-09-30 22:18:48.652652", "step": 4038, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:48.709752", "step": 4038, "epoch": 3 }, { "type": "loss", "content": 0.006815760396420956, "timestamp": "2025-09-30 22:18:48.720955", "step": 4039, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:48.770198", "step": 4039, "epoch": 3 }, { "type": "loss", "content": 0.003317673224955797, "timestamp": "2025-09-30 22:18:48.804525", "step": 4040, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:48.851356", "step": 4040, "epoch": 3 }, { "type": "loss", "content": 0.00768579775467515, "timestamp": "2025-09-30 22:18:48.863979", "step": 4041, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:48.900751", "step": 4041, "epoch": 3 }, { "type": "loss", "content": 0.004484781064093113, "timestamp": "2025-09-30 22:18:48.914537", "step": 4042, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:48.955563", "step": 4042, "epoch": 3 }, { "type": "loss", "content": 0.012383470311760902, "timestamp": "2025-09-30 22:18:48.969517", "step": 4043, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:49.018841", "step": 4043, "epoch": 3 }, { "type": "loss", "content": 0.007896116003394127, "timestamp": "2025-09-30 22:18:49.053135", "step": 4044, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:49.090178", "step": 4044, "epoch": 3 }, { "type": "loss", "content": 0.00294464617036283, "timestamp": "2025-09-30 22:18:49.100143", "step": 4045, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:49.146117", "step": 4045, "epoch": 3 }, { "type": "loss", "content": 0.0064725568518042564, "timestamp": "2025-09-30 22:18:49.158462", "step": 4046, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:49.193794", "step": 4046, "epoch": 3 }, { "type": "loss", "content": 0.005064612254500389, "timestamp": "2025-09-30 22:18:49.200856", "step": 4047, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:49.245051", "step": 4047, "epoch": 3 }, { "type": "loss", "content": 0.003626457182690501, "timestamp": "2025-09-30 22:18:49.281756", "step": 4048, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:49.341905", "step": 4048, "epoch": 3 }, { "type": "loss", "content": 0.006672897841781378, "timestamp": "2025-09-30 22:18:49.355017", "step": 4049, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:49.396912", "step": 4049, "epoch": 3 }, { "type": "loss", "content": 0.007566211279481649, "timestamp": "2025-09-30 22:18:49.404700", "step": 4050, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:49.457773", "step": 4050, "epoch": 3 }, { "type": "loss", "content": 0.009887561202049255, "timestamp": "2025-09-30 22:18:49.473638", "step": 4051, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:18:49.532939", "step": 4051, "epoch": 3 }, { "type": "loss", "content": 0.005307478364557028, "timestamp": "2025-09-30 22:18:49.571205", "step": 4052, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:49.619346", "step": 4052, "epoch": 3 }, { "type": "loss", "content": 0.009220928885042667, "timestamp": "2025-09-30 22:18:49.629165", "step": 4053, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:49.674453", "step": 4053, "epoch": 3 }, { "type": "loss", "content": 0.01152608823031187, "timestamp": "2025-09-30 22:18:49.686981", "step": 4054, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:49.729940", "step": 4054, "epoch": 3 }, { "type": "loss", "content": 0.0038574920035898685, "timestamp": "2025-09-30 22:18:49.740222", "step": 4055, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:49.788777", "step": 4055, "epoch": 3 }, { "type": "loss", "content": 0.010885195806622505, "timestamp": "2025-09-30 22:18:49.821854", "step": 4056, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:49.870966", "step": 4056, "epoch": 3 }, { "type": "loss", "content": 0.004181805998086929, "timestamp": "2025-09-30 22:18:49.879445", "step": 4057, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:49.932493", "step": 4057, "epoch": 3 }, { "type": "loss", "content": 0.004165918566286564, "timestamp": "2025-09-30 22:18:49.942936", "step": 4058, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:49.987852", "step": 4058, "epoch": 3 }, { "type": "loss", "content": 0.007592889945954084, "timestamp": "2025-09-30 22:18:49.996012", "step": 4059, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:50.045488", "step": 4059, "epoch": 3 }, { "type": "loss", "content": 0.009840689599514008, "timestamp": "2025-09-30 22:18:50.076659", "step": 4060, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:50.119002", "step": 4060, "epoch": 3 }, { "type": "loss", "content": 0.011874034069478512, "timestamp": "2025-09-30 22:18:50.128989", "step": 4061, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:50.173013", "step": 4061, "epoch": 3 }, { "type": "loss", "content": 0.0066198790445923805, "timestamp": "2025-09-30 22:18:50.185511", "step": 4062, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:50.240226", "step": 4062, "epoch": 3 }, { "type": "loss", "content": 0.009846655651926994, "timestamp": "2025-09-30 22:18:50.254050", "step": 4063, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:50.296765", "step": 4063, "epoch": 3 }, { "type": "loss", "content": 0.001079131499864161, "timestamp": "2025-09-30 22:18:50.326947", "step": 4064, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:50.362123", "step": 4064, "epoch": 3 }, { "type": "loss", "content": 0.00587060209363699, "timestamp": "2025-09-30 22:18:50.367558", "step": 4065, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:50.403145", "step": 4065, "epoch": 3 }, { "type": "loss", "content": 0.005940629635006189, "timestamp": "2025-09-30 22:18:50.415680", "step": 4066, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:50.458616", "step": 4066, "epoch": 3 }, { "type": "loss", "content": 0.0011660687159746885, "timestamp": "2025-09-30 22:18:50.466199", "step": 4067, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:50.506089", "step": 4067, "epoch": 3 }, { "type": "loss", "content": 0.002326065907254815, "timestamp": "2025-09-30 22:18:50.539234", "step": 4068, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:50.580698", "step": 4068, "epoch": 3 }, { "type": "loss", "content": 0.0008512693457305431, "timestamp": "2025-09-30 22:18:50.589271", "step": 4069, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:50.627165", "step": 4069, "epoch": 3 }, { "type": "loss", "content": 0.010879871435463428, "timestamp": "2025-09-30 22:18:50.639447", "step": 4070, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:50.690226", "step": 4070, "epoch": 3 }, { "type": "loss", "content": 0.00951231550425291, "timestamp": "2025-09-30 22:18:50.702775", "step": 4071, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:50.759843", "step": 4071, "epoch": 3 }, { "type": "loss", "content": 0.0026263566687703133, "timestamp": "2025-09-30 22:18:50.794136", "step": 4072, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:50.838345", "step": 4072, "epoch": 3 }, { "type": "loss", "content": 0.004461831878870726, "timestamp": "2025-09-30 22:18:50.846894", "step": 4073, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:50.889550", "step": 4073, "epoch": 3 }, { "type": "loss", "content": 0.008627448230981827, "timestamp": "2025-09-30 22:18:50.897676", "step": 4074, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:50.943787", "step": 4074, "epoch": 3 }, { "type": "loss", "content": 0.009395519271492958, "timestamp": "2025-09-30 22:18:50.957506", "step": 4075, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:50.997473", "step": 4075, "epoch": 3 }, { "type": "loss", "content": 0.0036263596266508102, "timestamp": "2025-09-30 22:18:51.025540", "step": 4076, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:51.058907", "step": 4076, "epoch": 3 }, { "type": "loss", "content": 0.01069303136318922, "timestamp": "2025-09-30 22:18:51.068753", "step": 4077, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:51.107921", "step": 4077, "epoch": 3 }, { "type": "loss", "content": 0.0033190203830599785, "timestamp": "2025-09-30 22:18:51.120189", "step": 4078, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:51.158668", "step": 4078, "epoch": 3 }, { "type": "loss", "content": 0.007404983509331942, "timestamp": "2025-09-30 22:18:51.168973", "step": 4079, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:51.209568", "step": 4079, "epoch": 3 }, { "type": "loss", "content": 0.002601143904030323, "timestamp": "2025-09-30 22:18:51.243010", "step": 4080, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:51.280124", "step": 4080, "epoch": 3 }, { "type": "loss", "content": 0.014199675992131233, "timestamp": "2025-09-30 22:18:51.292770", "step": 4081, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:51.334351", "step": 4081, "epoch": 3 }, { "type": "loss", "content": 0.004337809514254332, "timestamp": "2025-09-30 22:18:51.346928", "step": 4082, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:51.382342", "step": 4082, "epoch": 3 }, { "type": "loss", "content": 0.006910453084856272, "timestamp": "2025-09-30 22:18:51.390303", "step": 4083, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:51.435167", "step": 4083, "epoch": 3 }, { "type": "loss", "content": 0.0042189303785562515, "timestamp": "2025-09-30 22:18:51.469746", "step": 4084, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:51.509572", "step": 4084, "epoch": 3 }, { "type": "loss", "content": 0.006959874182939529, "timestamp": "2025-09-30 22:18:51.519550", "step": 4085, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:51.565429", "step": 4085, "epoch": 3 }, { "type": "loss", "content": 0.005139836110174656, "timestamp": "2025-09-30 22:18:51.578844", "step": 4086, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:51.628633", "step": 4086, "epoch": 3 }, { "type": "loss", "content": 0.0036931203212589025, "timestamp": "2025-09-30 22:18:51.642624", "step": 4087, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:51.695619", "step": 4087, "epoch": 3 }, { "type": "loss", "content": 0.005903789307922125, "timestamp": "2025-09-30 22:18:51.729035", "step": 4088, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:51.773651", "step": 4088, "epoch": 3 }, { "type": "loss", "content": 0.006845736410468817, "timestamp": "2025-09-30 22:18:51.786321", "step": 4089, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:51.834299", "step": 4089, "epoch": 3 }, { "type": "loss", "content": 0.006006104405969381, "timestamp": "2025-09-30 22:18:51.844709", "step": 4090, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:51.882781", "step": 4090, "epoch": 3 }, { "type": "loss", "content": 0.004835678264498711, "timestamp": "2025-09-30 22:18:51.890443", "step": 4091, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:51.931702", "step": 4091, "epoch": 3 }, { "type": "loss", "content": 0.008269528858363628, "timestamp": "2025-09-30 22:18:51.960794", "step": 4092, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:51.996031", "step": 4092, "epoch": 3 }, { "type": "loss", "content": 0.0042832400649785995, "timestamp": "2025-09-30 22:18:52.003606", "step": 4093, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:52.050170", "step": 4093, "epoch": 3 }, { "type": "loss", "content": 0.0063995858654379845, "timestamp": "2025-09-30 22:18:52.058243", "step": 4094, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:18:52.095785", "step": 4094, "epoch": 3 }, { "type": "loss", "content": 0.006296542473137379, "timestamp": "2025-09-30 22:18:52.100027", "step": 4095, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:18:52.138114", "step": 4095, "epoch": 3 }, { "type": "loss", "content": 0.004635804798454046, "timestamp": "2025-09-30 22:18:52.166234", "step": 4096, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:18:52.216739", "step": 4096, "epoch": 3 }, { "type": "loss", "content": 0.006758654490113258, "timestamp": "2025-09-30 22:18:52.234103", "step": 4097, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:52.273179", "step": 4097, "epoch": 3 }, { "type": "loss", "content": 0.00433450099080801, "timestamp": "2025-09-30 22:18:52.286924", "step": 4098, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:18:52.327342", "step": 4098, "epoch": 3 }, { "type": "loss", "content": 0.022350069135427475, "timestamp": "2025-09-30 22:18:52.335559", "step": 4099, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:52.375723", "step": 4099, "epoch": 3 }, { "type": "loss", "content": 0.006762483157217503, "timestamp": "2025-09-30 22:18:52.407495", "step": 4100, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:52.453283", "step": 4100, "epoch": 3 }, { "type": "loss", "content": 0.006077112630009651, "timestamp": "2025-09-30 22:18:52.459842", "step": 4101, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:52.508526", "step": 4101, "epoch": 3 }, { "type": "loss", "content": 0.009396334178745747, "timestamp": "2025-09-30 22:18:52.517162", "step": 4102, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:18:52.584744", "step": 4102, "epoch": 3 }, { "type": "loss", "content": 0.0033708529081195593, "timestamp": "2025-09-30 22:18:52.602388", "step": 4103, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:52.641561", "step": 4103, "epoch": 3 }, { "type": "loss", "content": 0.004656896460801363, "timestamp": "2025-09-30 22:18:52.670170", "step": 4104, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:52.705993", "step": 4104, "epoch": 3 }, { "type": "loss", "content": 0.009769827127456665, "timestamp": "2025-09-30 22:18:52.711671", "step": 4105, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:52.748407", "step": 4105, "epoch": 3 }, { "type": "loss", "content": 0.001809976645745337, "timestamp": "2025-09-30 22:18:52.756139", "step": 4106, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:52.802685", "step": 4106, "epoch": 3 }, { "type": "loss", "content": 0.005910452920943499, "timestamp": "2025-09-30 22:18:52.811137", "step": 4107, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:52.868617", "step": 4107, "epoch": 3 }, { "type": "loss", "content": 0.00518727907910943, "timestamp": "2025-09-30 22:18:52.903250", "step": 4108, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:52.944234", "step": 4108, "epoch": 3 }, { "type": "loss", "content": 0.007067784667015076, "timestamp": "2025-09-30 22:18:52.951454", "step": 4109, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:52.996175", "step": 4109, "epoch": 3 }, { "type": "loss", "content": 0.0049131568521261215, "timestamp": "2025-09-30 22:18:53.009855", "step": 4110, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:53.053646", "step": 4110, "epoch": 3 }, { "type": "loss", "content": 0.0052266800776124, "timestamp": "2025-09-30 22:18:53.067007", "step": 4111, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:53.127768", "step": 4111, "epoch": 3 }, { "type": "loss", "content": 0.004913975950330496, "timestamp": "2025-09-30 22:18:53.162347", "step": 4112, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:18:53.211080", "step": 4112, "epoch": 3 }, { "type": "loss", "content": 0.003879282856360078, "timestamp": "2025-09-30 22:18:53.227739", "step": 4113, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:53.268071", "step": 4113, "epoch": 3 }, { "type": "loss", "content": 0.004762736149132252, "timestamp": "2025-09-30 22:18:53.278458", "step": 4114, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:53.315117", "step": 4114, "epoch": 3 }, { "type": "loss", "content": 0.021380791440606117, "timestamp": "2025-09-30 22:18:53.322995", "step": 4115, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:53.379341", "step": 4115, "epoch": 3 }, { "type": "loss", "content": 0.0066805570386350155, "timestamp": "2025-09-30 22:18:53.411124", "step": 4116, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:53.447609", "step": 4116, "epoch": 3 }, { "type": "loss", "content": 0.006591428071260452, "timestamp": "2025-09-30 22:18:53.454704", "step": 4117, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:53.504531", "step": 4117, "epoch": 3 }, { "type": "loss", "content": 0.0032979913521558046, "timestamp": "2025-09-30 22:18:53.520418", "step": 4118, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:53.562101", "step": 4118, "epoch": 3 }, { "type": "loss", "content": 0.008285662159323692, "timestamp": "2025-09-30 22:18:53.576008", "step": 4119, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:53.614311", "step": 4119, "epoch": 3 }, { "type": "loss", "content": 0.005769718904048204, "timestamp": "2025-09-30 22:18:53.648824", "step": 4120, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:53.685187", "step": 4120, "epoch": 3 }, { "type": "loss", "content": 0.004447614308446646, "timestamp": "2025-09-30 22:18:53.693894", "step": 4121, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:18:53.746694", "step": 4121, "epoch": 3 }, { "type": "loss", "content": 0.0015994708519428968, "timestamp": "2025-09-30 22:18:53.754520", "step": 4122, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:53.791736", "step": 4122, "epoch": 3 }, { "type": "loss", "content": 0.0061613693833351135, "timestamp": "2025-09-30 22:18:53.804074", "step": 4123, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:53.844734", "step": 4123, "epoch": 3 }, { "type": "loss", "content": 0.00507601723074913, "timestamp": "2025-09-30 22:18:53.876537", "step": 4124, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:53.929647", "step": 4124, "epoch": 3 }, { "type": "loss", "content": 0.009173394180834293, "timestamp": "2025-09-30 22:18:53.942329", "step": 4125, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:53.987609", "step": 4125, "epoch": 3 }, { "type": "loss", "content": 0.0022419069427996874, "timestamp": "2025-09-30 22:18:53.995246", "step": 4126, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:54.037669", "step": 4126, "epoch": 3 }, { "type": "loss", "content": 0.008145290426909924, "timestamp": "2025-09-30 22:18:54.048734", "step": 4127, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:54.083897", "step": 4127, "epoch": 3 }, { "type": "loss", "content": 0.002505325712263584, "timestamp": "2025-09-30 22:18:54.116004", "step": 4128, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:54.154350", "step": 4128, "epoch": 3 }, { "type": "loss", "content": 0.006119324825704098, "timestamp": "2025-09-30 22:18:54.164970", "step": 4129, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:54.210351", "step": 4129, "epoch": 3 }, { "type": "loss", "content": 0.01269205380231142, "timestamp": "2025-09-30 22:18:54.218278", "step": 4130, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:54.266418", "step": 4130, "epoch": 3 }, { "type": "loss", "content": 0.009130466729402542, "timestamp": "2025-09-30 22:18:54.278943", "step": 4131, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:54.334100", "step": 4131, "epoch": 3 }, { "type": "loss", "content": 0.006617514882236719, "timestamp": "2025-09-30 22:18:54.368788", "step": 4132, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:54.414098", "step": 4132, "epoch": 3 }, { "type": "loss", "content": 0.0031109245028346777, "timestamp": "2025-09-30 22:18:54.427273", "step": 4133, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:54.473602", "step": 4133, "epoch": 3 }, { "type": "loss", "content": 0.008121561259031296, "timestamp": "2025-09-30 22:18:54.487406", "step": 4134, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:18:54.547693", "step": 4134, "epoch": 3 }, { "type": "loss", "content": 0.004598654806613922, "timestamp": "2025-09-30 22:18:54.563988", "step": 4135, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:54.601796", "step": 4135, "epoch": 3 }, { "type": "loss", "content": 0.0059106419794261456, "timestamp": "2025-09-30 22:18:54.635169", "step": 4136, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:54.678363", "step": 4136, "epoch": 3 }, { "type": "loss", "content": 0.0061576166190207005, "timestamp": "2025-09-30 22:18:54.691028", "step": 4137, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:54.741085", "step": 4137, "epoch": 3 }, { "type": "loss", "content": 0.0045078047551214695, "timestamp": "2025-09-30 22:18:54.756988", "step": 4138, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:54.799841", "step": 4138, "epoch": 3 }, { "type": "loss", "content": 0.007308666128665209, "timestamp": "2025-09-30 22:18:54.809892", "step": 4139, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:54.850422", "step": 4139, "epoch": 3 }, { "type": "loss", "content": 0.00295853428542614, "timestamp": "2025-09-30 22:18:54.882382", "step": 4140, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:18:57.607852", "step": 4140, "epoch": 3 }, { "type": "pplx", "content": 5.773015440842027, "timestamp": "2025-09-30 22:18:57.612067", "step": 4140, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:57.647540", "step": 4140, "epoch": 3 }, { "type": "loss", "content": 0.0040814983658492565, "timestamp": "2025-09-30 22:18:57.654240", "step": 4141, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:57.708759", "step": 4141, "epoch": 3 }, { "type": "loss", "content": 0.0023968368768692017, "timestamp": "2025-09-30 22:18:57.722124", "step": 4142, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:57.767755", "step": 4142, "epoch": 3 }, { "type": "loss", "content": 0.00340847484767437, "timestamp": "2025-09-30 22:18:57.781757", "step": 4143, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:57.819813", "step": 4143, "epoch": 3 }, { "type": "loss", "content": 0.005667718127369881, "timestamp": "2025-09-30 22:18:57.853249", "step": 4144, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:57.888506", "step": 4144, "epoch": 3 }, { "type": "loss", "content": 0.006874716840684414, "timestamp": "2025-09-30 22:18:57.898433", "step": 4145, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:57.936487", "step": 4145, "epoch": 3 }, { "type": "loss", "content": 0.010788613930344582, "timestamp": "2025-09-30 22:18:57.949062", "step": 4146, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:57.988173", "step": 4146, "epoch": 3 }, { "type": "loss", "content": 0.0027422248385846615, "timestamp": "2025-09-30 22:18:57.996094", "step": 4147, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:18:58.045290", "step": 4147, "epoch": 3 }, { "type": "loss", "content": 0.010014859959483147, "timestamp": "2025-09-30 22:18:58.081802", "step": 4148, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:18:58.124580", "step": 4148, "epoch": 3 }, { "type": "loss", "content": 0.0021662204526364803, "timestamp": "2025-09-30 22:18:58.139978", "step": 4149, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:18:58.185982", "step": 4149, "epoch": 3 }, { "type": "loss", "content": 0.006480331066995859, "timestamp": "2025-09-30 22:18:58.199815", "step": 4150, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:58.237927", "step": 4150, "epoch": 3 }, { "type": "loss", "content": 0.010116429068148136, "timestamp": "2025-09-30 22:18:58.250513", "step": 4151, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:18:58.306428", "step": 4151, "epoch": 3 }, { "type": "loss", "content": 0.0021906248293817043, "timestamp": "2025-09-30 22:18:58.341313", "step": 4152, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:18:58.379718", "step": 4152, "epoch": 3 }, { "type": "loss", "content": 0.0022437525913119316, "timestamp": "2025-09-30 22:18:58.388776", "step": 4153, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:58.433062", "step": 4153, "epoch": 3 }, { "type": "loss", "content": 0.0015901202568784356, "timestamp": "2025-09-30 22:18:58.446743", "step": 4154, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:58.485634", "step": 4154, "epoch": 3 }, { "type": "loss", "content": 0.0040994505397975445, "timestamp": "2025-09-30 22:18:58.493514", "step": 4155, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:58.540178", "step": 4155, "epoch": 3 }, { "type": "loss", "content": 0.005521212238818407, "timestamp": "2025-09-30 22:18:58.568655", "step": 4156, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:18:58.616000", "step": 4156, "epoch": 3 }, { "type": "loss", "content": 0.0036060779821127653, "timestamp": "2025-09-30 22:18:58.633096", "step": 4157, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:58.673057", "step": 4157, "epoch": 3 }, { "type": "loss", "content": 0.021071704104542732, "timestamp": "2025-09-30 22:18:58.681170", "step": 4158, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:58.721892", "step": 4158, "epoch": 3 }, { "type": "loss", "content": 0.00398464547470212, "timestamp": "2025-09-30 22:18:58.729544", "step": 4159, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:58.786858", "step": 4159, "epoch": 3 }, { "type": "loss", "content": 0.002623825566843152, "timestamp": "2025-09-30 22:18:58.818126", "step": 4160, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:58.853711", "step": 4160, "epoch": 3 }, { "type": "loss", "content": 0.007493019104003906, "timestamp": "2025-09-30 22:18:58.864438", "step": 4161, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:18:58.910465", "step": 4161, "epoch": 3 }, { "type": "loss", "content": 0.005534173455089331, "timestamp": "2025-09-30 22:18:58.926721", "step": 4162, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:58.973898", "step": 4162, "epoch": 3 }, { "type": "loss", "content": 0.0032436889596283436, "timestamp": "2025-09-30 22:18:58.986187", "step": 4163, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:59.024285", "step": 4163, "epoch": 3 }, { "type": "loss", "content": 0.007711523678153753, "timestamp": "2025-09-30 22:18:59.057473", "step": 4164, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:59.095498", "step": 4164, "epoch": 3 }, { "type": "loss", "content": 0.00906739104539156, "timestamp": "2025-09-30 22:18:59.103651", "step": 4165, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:59.157741", "step": 4165, "epoch": 3 }, { "type": "loss", "content": 0.00245729461312294, "timestamp": "2025-09-30 22:18:59.165689", "step": 4166, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:18:59.205803", "step": 4166, "epoch": 3 }, { "type": "loss", "content": 0.00661234138533473, "timestamp": "2025-09-30 22:18:59.217132", "step": 4167, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:18:59.259405", "step": 4167, "epoch": 3 }, { "type": "loss", "content": 0.0027672341093420982, "timestamp": "2025-09-30 22:18:59.292541", "step": 4168, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:59.331997", "step": 4168, "epoch": 3 }, { "type": "loss", "content": 0.00099122931715101, "timestamp": "2025-09-30 22:18:59.344611", "step": 4169, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:18:59.384233", "step": 4169, "epoch": 3 }, { "type": "loss", "content": 0.011586759239435196, "timestamp": "2025-09-30 22:18:59.396762", "step": 4170, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:59.435086", "step": 4170, "epoch": 3 }, { "type": "loss", "content": 0.003056830260902643, "timestamp": "2025-09-30 22:18:59.448748", "step": 4171, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:59.484176", "step": 4171, "epoch": 3 }, { "type": "loss", "content": 0.00391105841845274, "timestamp": "2025-09-30 22:18:59.516365", "step": 4172, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:59.563927", "step": 4172, "epoch": 3 }, { "type": "loss", "content": 0.001999323256313801, "timestamp": "2025-09-30 22:18:59.572685", "step": 4173, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:18:59.609068", "step": 4173, "epoch": 3 }, { "type": "loss", "content": 0.007335766684263945, "timestamp": "2025-09-30 22:18:59.620178", "step": 4174, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:59.657079", "step": 4174, "epoch": 3 }, { "type": "loss", "content": 0.004585715010762215, "timestamp": "2025-09-30 22:18:59.667494", "step": 4175, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:18:59.715849", "step": 4175, "epoch": 3 }, { "type": "loss", "content": 0.004428436979651451, "timestamp": "2025-09-30 22:18:59.752327", "step": 4176, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:18:59.813253", "step": 4176, "epoch": 3 }, { "type": "loss", "content": 0.006036048289388418, "timestamp": "2025-09-30 22:18:59.825849", "step": 4177, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:18:59.880058", "step": 4177, "epoch": 3 }, { "type": "loss", "content": 0.005262099672108889, "timestamp": "2025-09-30 22:18:59.893721", "step": 4178, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:18:59.945714", "step": 4178, "epoch": 3 }, { "type": "loss", "content": 0.004960318095982075, "timestamp": "2025-09-30 22:18:59.953669", "step": 4179, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:18:59.993644", "step": 4179, "epoch": 3 }, { "type": "loss", "content": 0.007269757799804211, "timestamp": "2025-09-30 22:19:00.027731", "step": 4180, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:00.063374", "step": 4180, "epoch": 3 }, { "type": "loss", "content": 0.005867009982466698, "timestamp": "2025-09-30 22:19:00.074190", "step": 4181, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:00.114517", "step": 4181, "epoch": 3 }, { "type": "loss", "content": 0.0028793776873499155, "timestamp": "2025-09-30 22:19:00.128224", "step": 4182, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:00.169338", "step": 4182, "epoch": 3 }, { "type": "loss", "content": 0.0024339838419109583, "timestamp": "2025-09-30 22:19:00.183029", "step": 4183, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:00.222213", "step": 4183, "epoch": 3 }, { "type": "loss", "content": 0.002243544440716505, "timestamp": "2025-09-30 22:19:00.256808", "step": 4184, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:00.302005", "step": 4184, "epoch": 3 }, { "type": "loss", "content": 0.005382244009524584, "timestamp": "2025-09-30 22:19:00.315169", "step": 4185, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:00.348820", "step": 4185, "epoch": 3 }, { "type": "loss", "content": 0.008612517267465591, "timestamp": "2025-09-30 22:19:00.361097", "step": 4186, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:00.396554", "step": 4186, "epoch": 3 }, { "type": "loss", "content": 0.006431583780795336, "timestamp": "2025-09-30 22:19:00.404542", "step": 4187, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:00.439345", "step": 4187, "epoch": 3 }, { "type": "loss", "content": 0.007725206669420004, "timestamp": "2025-09-30 22:19:00.472501", "step": 4188, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:00.508777", "step": 4188, "epoch": 3 }, { "type": "loss", "content": 0.00806194543838501, "timestamp": "2025-09-30 22:19:00.517157", "step": 4189, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:00.553602", "step": 4189, "epoch": 3 }, { "type": "loss", "content": 0.003619994968175888, "timestamp": "2025-09-30 22:19:00.561585", "step": 4190, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:00.617536", "step": 4190, "epoch": 3 }, { "type": "loss", "content": 0.001058433554135263, "timestamp": "2025-09-30 22:19:00.633122", "step": 4191, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:00.678045", "step": 4191, "epoch": 3 }, { "type": "loss", "content": 0.0038083905819803476, "timestamp": "2025-09-30 22:19:00.706896", "step": 4192, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:00.748299", "step": 4192, "epoch": 3 }, { "type": "loss", "content": 0.006782266311347485, "timestamp": "2025-09-30 22:19:00.760929", "step": 4193, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:00.800725", "step": 4193, "epoch": 3 }, { "type": "loss", "content": 0.008161071687936783, "timestamp": "2025-09-30 22:19:00.812015", "step": 4194, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:00.853572", "step": 4194, "epoch": 3 }, { "type": "loss", "content": 0.001998204505071044, "timestamp": "2025-09-30 22:19:00.867338", "step": 4195, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:00.923102", "step": 4195, "epoch": 3 }, { "type": "loss", "content": 0.001621428644284606, "timestamp": "2025-09-30 22:19:00.957340", "step": 4196, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:01.003286", "step": 4196, "epoch": 3 }, { "type": "loss", "content": 0.003493086202070117, "timestamp": "2025-09-30 22:19:01.013903", "step": 4197, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:01.049699", "step": 4197, "epoch": 3 }, { "type": "loss", "content": 0.001369775622151792, "timestamp": "2025-09-30 22:19:01.062016", "step": 4198, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:19:01.104786", "step": 4198, "epoch": 3 }, { "type": "loss", "content": 0.0035819520708173513, "timestamp": "2025-09-30 22:19:01.120693", "step": 4199, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:01.173809", "step": 4199, "epoch": 3 }, { "type": "loss", "content": 0.004347503650933504, "timestamp": "2025-09-30 22:19:01.208632", "step": 4200, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:01.249813", "step": 4200, "epoch": 3 }, { "type": "loss", "content": 0.0013405423378571868, "timestamp": "2025-09-30 22:19:01.257674", "step": 4201, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:01.317197", "step": 4201, "epoch": 3 }, { "type": "loss", "content": 0.00460792938247323, "timestamp": "2025-09-30 22:19:01.330924", "step": 4202, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:01.373942", "step": 4202, "epoch": 3 }, { "type": "loss", "content": 0.0026500532403588295, "timestamp": "2025-09-30 22:19:01.386073", "step": 4203, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:01.427312", "step": 4203, "epoch": 3 }, { "type": "loss", "content": 0.004506793338805437, "timestamp": "2025-09-30 22:19:01.459137", "step": 4204, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:01.503612", "step": 4204, "epoch": 3 }, { "type": "loss", "content": 0.009327051229774952, "timestamp": "2025-09-30 22:19:01.513661", "step": 4205, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:01.561340", "step": 4205, "epoch": 3 }, { "type": "loss", "content": 0.0073918732814490795, "timestamp": "2025-09-30 22:19:01.571678", "step": 4206, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:01.615606", "step": 4206, "epoch": 3 }, { "type": "loss", "content": 0.004413575399667025, "timestamp": "2025-09-30 22:19:01.626826", "step": 4207, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:01.664611", "step": 4207, "epoch": 3 }, { "type": "loss", "content": 0.003055840963497758, "timestamp": "2025-09-30 22:19:01.692846", "step": 4208, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:01.727747", "step": 4208, "epoch": 3 }, { "type": "loss", "content": 0.013104332610964775, "timestamp": "2025-09-30 22:19:01.733327", "step": 4209, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:01.782918", "step": 4209, "epoch": 3 }, { "type": "loss", "content": 0.011806270107626915, "timestamp": "2025-09-30 22:19:01.790159", "step": 4210, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:01.834407", "step": 4210, "epoch": 3 }, { "type": "loss", "content": 0.009919954463839531, "timestamp": "2025-09-30 22:19:01.848170", "step": 4211, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:01.886463", "step": 4211, "epoch": 3 }, { "type": "loss", "content": 0.00418841140344739, "timestamp": "2025-09-30 22:19:01.915289", "step": 4212, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:01.952394", "step": 4212, "epoch": 3 }, { "type": "loss", "content": 0.0029787688981741667, "timestamp": "2025-09-30 22:19:01.965705", "step": 4213, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:02.003590", "step": 4213, "epoch": 3 }, { "type": "loss", "content": 0.0026507768779993057, "timestamp": "2025-09-30 22:19:02.016995", "step": 4214, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:02.063406", "step": 4214, "epoch": 3 }, { "type": "loss", "content": 0.011196503415703773, "timestamp": "2025-09-30 22:19:02.071327", "step": 4215, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:02.109099", "step": 4215, "epoch": 3 }, { "type": "loss", "content": 0.011339684948325157, "timestamp": "2025-09-30 22:19:02.142230", "step": 4216, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:02.184627", "step": 4216, "epoch": 3 }, { "type": "loss", "content": 0.007355344947427511, "timestamp": "2025-09-30 22:19:02.197893", "step": 4217, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:02.242566", "step": 4217, "epoch": 3 }, { "type": "loss", "content": 0.0026141307316720486, "timestamp": "2025-09-30 22:19:02.255092", "step": 4218, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:02.296408", "step": 4218, "epoch": 3 }, { "type": "loss", "content": 0.0013344758190214634, "timestamp": "2025-09-30 22:19:02.310222", "step": 4219, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:02.345179", "step": 4219, "epoch": 3 }, { "type": "loss", "content": 0.004837046377360821, "timestamp": "2025-09-30 22:19:02.373388", "step": 4220, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:02.407432", "step": 4220, "epoch": 3 }, { "type": "loss", "content": 0.011121508665382862, "timestamp": "2025-09-30 22:19:02.412350", "step": 4221, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:02.446116", "step": 4221, "epoch": 3 }, { "type": "loss", "content": 0.017535170540213585, "timestamp": "2025-09-30 22:19:02.454025", "step": 4222, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:02.505142", "step": 4222, "epoch": 3 }, { "type": "loss", "content": 0.0030088103376328945, "timestamp": "2025-09-30 22:19:02.524032", "step": 4223, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:02.561067", "step": 4223, "epoch": 3 }, { "type": "loss", "content": 0.0013439098838716745, "timestamp": "2025-09-30 22:19:02.589197", "step": 4224, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:02.628464", "step": 4224, "epoch": 3 }, { "type": "loss", "content": 0.004040613770484924, "timestamp": "2025-09-30 22:19:02.634920", "step": 4225, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:02.678619", "step": 4225, "epoch": 3 }, { "type": "loss", "content": 0.0047653415240347385, "timestamp": "2025-09-30 22:19:02.686253", "step": 4226, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:02.723287", "step": 4226, "epoch": 3 }, { "type": "loss", "content": 0.005329497158527374, "timestamp": "2025-09-30 22:19:02.736630", "step": 4227, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:02.769458", "step": 4227, "epoch": 3 }, { "type": "loss", "content": 0.003973810467869043, "timestamp": "2025-09-30 22:19:02.798172", "step": 4228, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:02.835397", "step": 4228, "epoch": 3 }, { "type": "loss", "content": 0.003771683434024453, "timestamp": "2025-09-30 22:19:02.848529", "step": 4229, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:02.887001", "step": 4229, "epoch": 3 }, { "type": "loss", "content": 0.0023885814007371664, "timestamp": "2025-09-30 22:19:02.900736", "step": 4230, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:02.935781", "step": 4230, "epoch": 3 }, { "type": "loss", "content": 0.021918607875704765, "timestamp": "2025-09-30 22:19:02.949151", "step": 4231, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:02.985432", "step": 4231, "epoch": 3 }, { "type": "loss", "content": 0.004830060061067343, "timestamp": "2025-09-30 22:19:03.020148", "step": 4232, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:03.053543", "step": 4232, "epoch": 3 }, { "type": "loss", "content": 0.0036449062172323465, "timestamp": "2025-09-30 22:19:03.063585", "step": 4233, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:03.101175", "step": 4233, "epoch": 3 }, { "type": "loss", "content": 0.007518450729548931, "timestamp": "2025-09-30 22:19:03.112406", "step": 4234, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:03.148105", "step": 4234, "epoch": 3 }, { "type": "loss", "content": 0.004306568764150143, "timestamp": "2025-09-30 22:19:03.160699", "step": 4235, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:03.204916", "step": 4235, "epoch": 3 }, { "type": "loss", "content": 0.0038689924404025078, "timestamp": "2025-09-30 22:19:03.238385", "step": 4236, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:03.280175", "step": 4236, "epoch": 3 }, { "type": "loss", "content": 0.005623604636639357, "timestamp": "2025-09-30 22:19:03.288049", "step": 4237, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:03.327408", "step": 4237, "epoch": 3 }, { "type": "loss", "content": 0.005663564428687096, "timestamp": "2025-09-30 22:19:03.334697", "step": 4238, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:03.371457", "step": 4238, "epoch": 3 }, { "type": "loss", "content": 0.0026802935171872377, "timestamp": "2025-09-30 22:19:03.379276", "step": 4239, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:03.421167", "step": 4239, "epoch": 3 }, { "type": "loss", "content": 0.005176326725631952, "timestamp": "2025-09-30 22:19:03.452502", "step": 4240, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:03.488746", "step": 4240, "epoch": 3 }, { "type": "loss", "content": 0.009702826850116253, "timestamp": "2025-09-30 22:19:03.494403", "step": 4241, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:03.529508", "step": 4241, "epoch": 3 }, { "type": "loss", "content": 0.008449496701359749, "timestamp": "2025-09-30 22:19:03.536389", "step": 4242, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:03.576690", "step": 4242, "epoch": 3 }, { "type": "loss", "content": 0.006167837418615818, "timestamp": "2025-09-30 22:19:03.592324", "step": 4243, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:03.638147", "step": 4243, "epoch": 3 }, { "type": "loss", "content": 0.0028893337585031986, "timestamp": "2025-09-30 22:19:03.672817", "step": 4244, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:03.707762", "step": 4244, "epoch": 3 }, { "type": "loss", "content": 0.0020776870660483837, "timestamp": "2025-09-30 22:19:03.713056", "step": 4245, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:19:03.751860", "step": 4245, "epoch": 3 }, { "type": "loss", "content": 0.008614156395196915, "timestamp": "2025-09-30 22:19:03.756002", "step": 4246, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:03.791138", "step": 4246, "epoch": 3 }, { "type": "loss", "content": 0.0047997101210057735, "timestamp": "2025-09-30 22:19:03.798731", "step": 4247, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:03.837841", "step": 4247, "epoch": 3 }, { "type": "loss", "content": 0.006953793577849865, "timestamp": "2025-09-30 22:19:03.871258", "step": 4248, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:03.912452", "step": 4248, "epoch": 3 }, { "type": "loss", "content": 0.0016016876325011253, "timestamp": "2025-09-30 22:19:03.917656", "step": 4249, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:03.974172", "step": 4249, "epoch": 3 }, { "type": "loss", "content": 0.00614569429308176, "timestamp": "2025-09-30 22:19:03.986584", "step": 4250, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:04.031277", "step": 4250, "epoch": 3 }, { "type": "loss", "content": 0.0028142465744167566, "timestamp": "2025-09-30 22:19:04.039207", "step": 4251, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:04.080539", "step": 4251, "epoch": 3 }, { "type": "loss", "content": 0.005324391182512045, "timestamp": "2025-09-30 22:19:04.114159", "step": 4252, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:04.151741", "step": 4252, "epoch": 3 }, { "type": "loss", "content": 0.00806656014174223, "timestamp": "2025-09-30 22:19:04.157311", "step": 4253, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:04.226386", "step": 4253, "epoch": 3 }, { "type": "loss", "content": 0.005702858325093985, "timestamp": "2025-09-30 22:19:04.236901", "step": 4254, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:04.277929", "step": 4254, "epoch": 3 }, { "type": "loss", "content": 0.006622773595154285, "timestamp": "2025-09-30 22:19:04.291643", "step": 4255, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:19:06.850289", "step": 4255, "epoch": 3 }, { "type": "pplx", "content": 5.953026891361323, "timestamp": "2025-09-30 22:19:06.853599", "step": 4255, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:19:06.897051", "step": 4255, "epoch": 3 }, { "type": "loss", "content": 0.0011021263198927045, "timestamp": "2025-09-30 22:19:06.927939", "step": 4256, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:06.964283", "step": 4256, "epoch": 3 }, { "type": "loss", "content": 0.0023401672951877117, "timestamp": "2025-09-30 22:19:06.972164", "step": 4257, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:07.016186", "step": 4257, "epoch": 3 }, { "type": "loss", "content": 0.01058325543999672, "timestamp": "2025-09-30 22:19:07.023509", "step": 4258, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:07.066173", "step": 4258, "epoch": 3 }, { "type": "loss", "content": 0.0055806296877563, "timestamp": "2025-09-30 22:19:07.079748", "step": 4259, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:07.121618", "step": 4259, "epoch": 3 }, { "type": "loss", "content": 0.0015534240519627929, "timestamp": "2025-09-30 22:19:07.156187", "step": 4260, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:07.196271", "step": 4260, "epoch": 3 }, { "type": "loss", "content": 0.0026457849889993668, "timestamp": "2025-09-30 22:19:07.208928", "step": 4261, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:07.253446", "step": 4261, "epoch": 3 }, { "type": "loss", "content": 0.0017473185434937477, "timestamp": "2025-09-30 22:19:07.261431", "step": 4262, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:07.294895", "step": 4262, "epoch": 3 }, { "type": "loss", "content": 0.0018314578337594867, "timestamp": "2025-09-30 22:19:07.305228", "step": 4263, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:07.346132", "step": 4263, "epoch": 3 }, { "type": "loss", "content": 0.00169796880800277, "timestamp": "2025-09-30 22:19:07.378009", "step": 4264, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:19:07.422956", "step": 4264, "epoch": 3 }, { "type": "loss", "content": 0.00701497495174408, "timestamp": "2025-09-30 22:19:07.438789", "step": 4265, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:07.472061", "step": 4265, "epoch": 3 }, { "type": "loss", "content": 0.008701121434569359, "timestamp": "2025-09-30 22:19:07.483172", "step": 4266, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:07.518054", "step": 4266, "epoch": 3 }, { "type": "loss", "content": 0.002603327389806509, "timestamp": "2025-09-30 22:19:07.530208", "step": 4267, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:07.564482", "step": 4267, "epoch": 3 }, { "type": "loss", "content": 0.003772999159991741, "timestamp": "2025-09-30 22:19:07.596672", "step": 4268, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 576 ], "flops": 17085996872448 }, "timestamp": "2025-09-30 22:19:07.655042", "step": 4268, "epoch": 3 }, { "type": "loss", "content": 0.0037890970706939697, "timestamp": "2025-09-30 22:19:07.674316", "step": 4269, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:07.719110", "step": 4269, "epoch": 3 }, { "type": "loss", "content": 0.004082482308149338, "timestamp": "2025-09-30 22:19:07.733023", "step": 4270, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:07.768301", "step": 4270, "epoch": 3 }, { "type": "loss", "content": 0.009964211843907833, "timestamp": "2025-09-30 22:19:07.778591", "step": 4271, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:07.813755", "step": 4271, "epoch": 3 }, { "type": "loss", "content": 0.007102191913872957, "timestamp": "2025-09-30 22:19:07.845005", "step": 4272, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:07.879765", "step": 4272, "epoch": 3 }, { "type": "loss", "content": 0.00931872334331274, "timestamp": "2025-09-30 22:19:07.890305", "step": 4273, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:07.930522", "step": 4273, "epoch": 3 }, { "type": "loss", "content": 0.0022495731245726347, "timestamp": "2025-09-30 22:19:07.944446", "step": 4274, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:07.985567", "step": 4274, "epoch": 3 }, { "type": "loss", "content": 0.004006609320640564, "timestamp": "2025-09-30 22:19:07.999283", "step": 4275, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:08.035564", "step": 4275, "epoch": 3 }, { "type": "loss", "content": 0.008295389823615551, "timestamp": "2025-09-30 22:19:08.063693", "step": 4276, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:08.099774", "step": 4276, "epoch": 3 }, { "type": "loss", "content": 0.0014035895001143217, "timestamp": "2025-09-30 22:19:08.108223", "step": 4277, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:08.150002", "step": 4277, "epoch": 3 }, { "type": "loss", "content": 0.0077087851241230965, "timestamp": "2025-09-30 22:19:08.161070", "step": 4278, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:08.203994", "step": 4278, "epoch": 3 }, { "type": "loss", "content": 0.002847875002771616, "timestamp": "2025-09-30 22:19:08.215147", "step": 4279, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:08.251323", "step": 4279, "epoch": 3 }, { "type": "loss", "content": 0.004052981734275818, "timestamp": "2025-09-30 22:19:08.282513", "step": 4280, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:19:08.325199", "step": 4280, "epoch": 3 }, { "type": "loss", "content": 0.0036239621695131063, "timestamp": "2025-09-30 22:19:08.341919", "step": 4281, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:08.378924", "step": 4281, "epoch": 3 }, { "type": "loss", "content": 0.00455652317032218, "timestamp": "2025-09-30 22:19:08.385965", "step": 4282, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:08.426331", "step": 4282, "epoch": 3 }, { "type": "loss", "content": 0.007087154779583216, "timestamp": "2025-09-30 22:19:08.436573", "step": 4283, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:08.492487", "step": 4283, "epoch": 3 }, { "type": "loss", "content": 0.008435919880867004, "timestamp": "2025-09-30 22:19:08.521313", "step": 4284, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:08.564592", "step": 4284, "epoch": 3 }, { "type": "loss", "content": 0.0013751048827543855, "timestamp": "2025-09-30 22:19:08.570189", "step": 4285, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:08.628834", "step": 4285, "epoch": 3 }, { "type": "loss", "content": 0.010922163724899292, "timestamp": "2025-09-30 22:19:08.639694", "step": 4286, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:08.695950", "step": 4286, "epoch": 3 }, { "type": "loss", "content": 0.002734346082434058, "timestamp": "2025-09-30 22:19:08.709278", "step": 4287, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:08.748917", "step": 4287, "epoch": 3 }, { "type": "loss", "content": 0.01130365114659071, "timestamp": "2025-09-30 22:19:08.781022", "step": 4288, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:08.825831", "step": 4288, "epoch": 3 }, { "type": "loss", "content": 0.014183576218783855, "timestamp": "2025-09-30 22:19:08.835895", "step": 4289, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:08.880131", "step": 4289, "epoch": 3 }, { "type": "loss", "content": 0.006344980094581842, "timestamp": "2025-09-30 22:19:08.887420", "step": 4290, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:08.925273", "step": 4290, "epoch": 3 }, { "type": "loss", "content": 0.004288821946829557, "timestamp": "2025-09-30 22:19:08.932865", "step": 4291, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:08.966631", "step": 4291, "epoch": 3 }, { "type": "loss", "content": 0.00489983893930912, "timestamp": "2025-09-30 22:19:08.998653", "step": 4292, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:09.044869", "step": 4292, "epoch": 3 }, { "type": "loss", "content": 0.005966820288449526, "timestamp": "2025-09-30 22:19:09.050213", "step": 4293, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:09.089173", "step": 4293, "epoch": 3 }, { "type": "loss", "content": 0.0028437243308871984, "timestamp": "2025-09-30 22:19:09.099550", "step": 4294, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:09.138196", "step": 4294, "epoch": 3 }, { "type": "loss", "content": 0.006014253944158554, "timestamp": "2025-09-30 22:19:09.159570", "step": 4295, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:09.196223", "step": 4295, "epoch": 3 }, { "type": "loss", "content": 0.00552031584084034, "timestamp": "2025-09-30 22:19:09.224110", "step": 4296, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:09.261283", "step": 4296, "epoch": 3 }, { "type": "loss", "content": 0.006494057364761829, "timestamp": "2025-09-30 22:19:09.269886", "step": 4297, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:09.304583", "step": 4297, "epoch": 3 }, { "type": "loss", "content": 0.006987850181758404, "timestamp": "2025-09-30 22:19:09.315060", "step": 4298, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:09.351181", "step": 4298, "epoch": 3 }, { "type": "loss", "content": 0.0029523270204663277, "timestamp": "2025-09-30 22:19:09.362311", "step": 4299, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:09.408582", "step": 4299, "epoch": 3 }, { "type": "loss", "content": 0.010710208676755428, "timestamp": "2025-09-30 22:19:09.440314", "step": 4300, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:09.474381", "step": 4300, "epoch": 3 }, { "type": "loss", "content": 0.0016859716270118952, "timestamp": "2025-09-30 22:19:09.485510", "step": 4301, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:09.521687", "step": 4301, "epoch": 3 }, { "type": "loss", "content": 0.005892353132367134, "timestamp": "2025-09-30 22:19:09.532745", "step": 4302, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:09.566787", "step": 4302, "epoch": 3 }, { "type": "loss", "content": 0.0008297090535052121, "timestamp": "2025-09-30 22:19:09.577986", "step": 4303, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:09.625530", "step": 4303, "epoch": 3 }, { "type": "loss", "content": 0.007029360625892878, "timestamp": "2025-09-30 22:19:09.658953", "step": 4304, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:09.691460", "step": 4304, "epoch": 3 }, { "type": "loss", "content": 0.0020163406152278185, "timestamp": "2025-09-30 22:19:09.697056", "step": 4305, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:09.730328", "step": 4305, "epoch": 3 }, { "type": "loss", "content": 0.008451982401311398, "timestamp": "2025-09-30 22:19:09.740647", "step": 4306, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:09.776823", "step": 4306, "epoch": 3 }, { "type": "loss", "content": 0.0029738156590610743, "timestamp": "2025-09-30 22:19:09.788180", "step": 4307, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:09.829871", "step": 4307, "epoch": 3 }, { "type": "loss", "content": 0.004055047873407602, "timestamp": "2025-09-30 22:19:09.858695", "step": 4308, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:09.903606", "step": 4308, "epoch": 3 }, { "type": "loss", "content": 0.004126362968236208, "timestamp": "2025-09-30 22:19:09.916608", "step": 4309, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:19:09.983910", "step": 4309, "epoch": 3 }, { "type": "loss", "content": 0.009861967526376247, "timestamp": "2025-09-30 22:19:10.001168", "step": 4310, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:10.041811", "step": 4310, "epoch": 3 }, { "type": "loss", "content": 0.006570646073669195, "timestamp": "2025-09-30 22:19:10.055837", "step": 4311, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:10.098272", "step": 4311, "epoch": 3 }, { "type": "loss", "content": 0.008274688385426998, "timestamp": "2025-09-30 22:19:10.132784", "step": 4312, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:10.170119", "step": 4312, "epoch": 3 }, { "type": "loss", "content": 0.006628716830164194, "timestamp": "2025-09-30 22:19:10.175276", "step": 4313, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:10.221181", "step": 4313, "epoch": 3 }, { "type": "loss", "content": 0.0055550276301801205, "timestamp": "2025-09-30 22:19:10.234806", "step": 4314, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:19:10.280874", "step": 4314, "epoch": 3 }, { "type": "loss", "content": 0.002743455581367016, "timestamp": "2025-09-30 22:19:10.297930", "step": 4315, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:10.339639", "step": 4315, "epoch": 3 }, { "type": "loss", "content": 0.009410511702299118, "timestamp": "2025-09-30 22:19:10.374156", "step": 4316, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:10.420579", "step": 4316, "epoch": 3 }, { "type": "loss", "content": 0.003582603530958295, "timestamp": "2025-09-30 22:19:10.433919", "step": 4317, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:19:10.476659", "step": 4317, "epoch": 3 }, { "type": "loss", "content": 0.0032016027253121138, "timestamp": "2025-09-30 22:19:10.493041", "step": 4318, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:19:10.545342", "step": 4318, "epoch": 3 }, { "type": "loss", "content": 0.0011657862924039364, "timestamp": "2025-09-30 22:19:10.562565", "step": 4319, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:10.606312", "step": 4319, "epoch": 3 }, { "type": "loss", "content": 0.009448532946407795, "timestamp": "2025-09-30 22:19:10.641172", "step": 4320, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:19:10.688526", "step": 4320, "epoch": 3 }, { "type": "loss", "content": 0.0023872260935604572, "timestamp": "2025-09-30 22:19:10.704456", "step": 4321, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:10.757888", "step": 4321, "epoch": 3 }, { "type": "loss", "content": 0.01825058087706566, "timestamp": "2025-09-30 22:19:10.765562", "step": 4322, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:10.798220", "step": 4322, "epoch": 3 }, { "type": "loss", "content": 0.0006977806915529072, "timestamp": "2025-09-30 22:19:10.805991", "step": 4323, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:10.844655", "step": 4323, "epoch": 3 }, { "type": "loss", "content": 0.007597570773214102, "timestamp": "2025-09-30 22:19:10.877710", "step": 4324, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:10.915272", "step": 4324, "epoch": 3 }, { "type": "loss", "content": 0.005277728196233511, "timestamp": "2025-09-30 22:19:10.925181", "step": 4325, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:10.972285", "step": 4325, "epoch": 3 }, { "type": "loss", "content": 0.0012779583921656013, "timestamp": "2025-09-30 22:19:10.984616", "step": 4326, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:11.031885", "step": 4326, "epoch": 3 }, { "type": "loss", "content": 0.006616917438805103, "timestamp": "2025-09-30 22:19:11.044502", "step": 4327, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:11.080175", "step": 4327, "epoch": 3 }, { "type": "loss", "content": 0.002342329826205969, "timestamp": "2025-09-30 22:19:11.111461", "step": 4328, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:11.150659", "step": 4328, "epoch": 3 }, { "type": "loss", "content": 0.002416786039248109, "timestamp": "2025-09-30 22:19:11.163372", "step": 4329, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:11.210888", "step": 4329, "epoch": 3 }, { "type": "loss", "content": 0.0071250577457249165, "timestamp": "2025-09-30 22:19:11.224268", "step": 4330, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:11.261897", "step": 4330, "epoch": 3 }, { "type": "loss", "content": 0.0012249717256054282, "timestamp": "2025-09-30 22:19:11.269625", "step": 4331, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:11.309379", "step": 4331, "epoch": 3 }, { "type": "loss", "content": 0.004613219760358334, "timestamp": "2025-09-30 22:19:11.337194", "step": 4332, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:11.378826", "step": 4332, "epoch": 3 }, { "type": "loss", "content": 0.007091932464390993, "timestamp": "2025-09-30 22:19:11.387433", "step": 4333, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:11.430415", "step": 4333, "epoch": 3 }, { "type": "loss", "content": 0.0019520223140716553, "timestamp": "2025-09-30 22:19:11.442713", "step": 4334, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:11.489068", "step": 4334, "epoch": 3 }, { "type": "loss", "content": 0.0044762929901480675, "timestamp": "2025-09-30 22:19:11.496363", "step": 4335, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:11.530196", "step": 4335, "epoch": 3 }, { "type": "loss", "content": 0.0015030631329864264, "timestamp": "2025-09-30 22:19:11.563358", "step": 4336, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:11.603094", "step": 4336, "epoch": 3 }, { "type": "loss", "content": 0.005056925117969513, "timestamp": "2025-09-30 22:19:11.607958", "step": 4337, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:11.647290", "step": 4337, "epoch": 3 }, { "type": "loss", "content": 0.011781950481235981, "timestamp": "2025-09-30 22:19:11.657607", "step": 4338, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:11.690406", "step": 4338, "epoch": 3 }, { "type": "loss", "content": 0.0026247689966112375, "timestamp": "2025-09-30 22:19:11.697313", "step": 4339, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:11.750057", "step": 4339, "epoch": 3 }, { "type": "loss", "content": 0.003992740530520678, "timestamp": "2025-09-30 22:19:11.781343", "step": 4340, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:11.825837", "step": 4340, "epoch": 3 }, { "type": "loss", "content": 0.0036123378667980433, "timestamp": "2025-09-30 22:19:11.833783", "step": 4341, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:11.874985", "step": 4341, "epoch": 3 }, { "type": "loss", "content": 0.010466721840202808, "timestamp": "2025-09-30 22:19:11.887459", "step": 4342, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:11.924533", "step": 4342, "epoch": 3 }, { "type": "loss", "content": 0.004870318807661533, "timestamp": "2025-09-30 22:19:11.934804", "step": 4343, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:11.974276", "step": 4343, "epoch": 3 }, { "type": "loss", "content": 0.0008079107501544058, "timestamp": "2025-09-30 22:19:12.002988", "step": 4344, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:12.043087", "step": 4344, "epoch": 3 }, { "type": "loss", "content": 0.0037499789614230394, "timestamp": "2025-09-30 22:19:12.048338", "step": 4345, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:12.087573", "step": 4345, "epoch": 3 }, { "type": "loss", "content": 0.0018727704882621765, "timestamp": "2025-09-30 22:19:12.094474", "step": 4346, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:12.136157", "step": 4346, "epoch": 3 }, { "type": "loss", "content": 0.004115007352083921, "timestamp": "2025-09-30 22:19:12.144158", "step": 4347, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:12.178609", "step": 4347, "epoch": 3 }, { "type": "loss", "content": 0.0139734772965312, "timestamp": "2025-09-30 22:19:12.207451", "step": 4348, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:12.244608", "step": 4348, "epoch": 3 }, { "type": "loss", "content": 0.003832270158454776, "timestamp": "2025-09-30 22:19:12.252692", "step": 4349, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:12.292601", "step": 4349, "epoch": 3 }, { "type": "loss", "content": 0.004294030833989382, "timestamp": "2025-09-30 22:19:12.305194", "step": 4350, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:12.349346", "step": 4350, "epoch": 3 }, { "type": "loss", "content": 0.0010019203182309866, "timestamp": "2025-09-30 22:19:12.353960", "step": 4351, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:12.395269", "step": 4351, "epoch": 3 }, { "type": "loss", "content": 0.0012013876112177968, "timestamp": "2025-09-30 22:19:12.423199", "step": 4352, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:12.463687", "step": 4352, "epoch": 3 }, { "type": "loss", "content": 0.005679008085280657, "timestamp": "2025-09-30 22:19:12.473487", "step": 4353, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:12.506292", "step": 4353, "epoch": 3 }, { "type": "loss", "content": 0.0026292474940419197, "timestamp": "2025-09-30 22:19:12.516766", "step": 4354, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:12.553864", "step": 4354, "epoch": 3 }, { "type": "loss", "content": 0.0029538250528275967, "timestamp": "2025-09-30 22:19:12.567252", "step": 4355, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:12.604588", "step": 4355, "epoch": 3 }, { "type": "loss", "content": 0.0026939180679619312, "timestamp": "2025-09-30 22:19:12.633365", "step": 4356, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:12.668085", "step": 4356, "epoch": 3 }, { "type": "loss", "content": 0.003916259855031967, "timestamp": "2025-09-30 22:19:12.676122", "step": 4357, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:19:12.726197", "step": 4357, "epoch": 3 }, { "type": "loss", "content": 0.005390469450503588, "timestamp": "2025-09-30 22:19:12.742088", "step": 4358, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:12.784714", "step": 4358, "epoch": 3 }, { "type": "loss", "content": 0.0019631667528301477, "timestamp": "2025-09-30 22:19:12.798065", "step": 4359, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:12.842955", "step": 4359, "epoch": 3 }, { "type": "loss", "content": 0.0065919035114347935, "timestamp": "2025-09-30 22:19:12.877491", "step": 4360, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:12.914866", "step": 4360, "epoch": 3 }, { "type": "loss", "content": 0.0034850030206143856, "timestamp": "2025-09-30 22:19:12.927975", "step": 4361, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:12.966951", "step": 4361, "epoch": 3 }, { "type": "loss", "content": 0.0016640233807265759, "timestamp": "2025-09-30 22:19:12.979305", "step": 4362, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:13.023953", "step": 4362, "epoch": 3 }, { "type": "loss", "content": 0.00251871719956398, "timestamp": "2025-09-30 22:19:13.032013", "step": 4363, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:13.075520", "step": 4363, "epoch": 3 }, { "type": "loss", "content": 0.0030696927569806576, "timestamp": "2025-09-30 22:19:13.110037", "step": 4364, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:13.153783", "step": 4364, "epoch": 3 }, { "type": "loss", "content": 0.00622408976778388, "timestamp": "2025-09-30 22:19:13.166446", "step": 4365, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:13.211109", "step": 4365, "epoch": 3 }, { "type": "loss", "content": 0.003771674120798707, "timestamp": "2025-09-30 22:19:13.224800", "step": 4366, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:13.258888", "step": 4366, "epoch": 3 }, { "type": "loss", "content": 0.0017379183555021882, "timestamp": "2025-09-30 22:19:13.265803", "step": 4367, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:13.299777", "step": 4367, "epoch": 3 }, { "type": "loss", "content": 0.0018934322288259864, "timestamp": "2025-09-30 22:19:13.328256", "step": 4368, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:13.375821", "step": 4368, "epoch": 3 }, { "type": "loss", "content": 0.006382144056260586, "timestamp": "2025-09-30 22:19:13.390937", "step": 4369, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:13.439713", "step": 4369, "epoch": 3 }, { "type": "loss", "content": 0.0010274213273078203, "timestamp": "2025-09-30 22:19:13.450159", "step": 4370, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:19:16.081076", "step": 4370, "epoch": 3 }, { "type": "pplx", "content": 5.953068817951526, "timestamp": "2025-09-30 22:19:16.085603", "step": 4370, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:16.121030", "step": 4370, "epoch": 3 }, { "type": "loss", "content": 0.008330886252224445, "timestamp": "2025-09-30 22:19:16.127589", "step": 4371, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:16.167944", "step": 4371, "epoch": 3 }, { "type": "loss", "content": 0.005211700685322285, "timestamp": "2025-09-30 22:19:16.199921", "step": 4372, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:16.237190", "step": 4372, "epoch": 3 }, { "type": "loss", "content": 0.004459177143871784, "timestamp": "2025-09-30 22:19:16.242790", "step": 4373, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:16.277880", "step": 4373, "epoch": 3 }, { "type": "loss", "content": 0.010086101479828358, "timestamp": "2025-09-30 22:19:16.290247", "step": 4374, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:16.327193", "step": 4374, "epoch": 3 }, { "type": "loss", "content": 0.0048518106341362, "timestamp": "2025-09-30 22:19:16.334762", "step": 4375, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:16.371764", "step": 4375, "epoch": 3 }, { "type": "loss", "content": 0.004126888699829578, "timestamp": "2025-09-30 22:19:16.405238", "step": 4376, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:16.442434", "step": 4376, "epoch": 3 }, { "type": "loss", "content": 0.002302473410964012, "timestamp": "2025-09-30 22:19:16.455410", "step": 4377, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:16.502713", "step": 4377, "epoch": 3 }, { "type": "loss", "content": 0.0057604811154305935, "timestamp": "2025-09-30 22:19:16.513076", "step": 4378, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:16.547407", "step": 4378, "epoch": 3 }, { "type": "loss", "content": 0.004845494404435158, "timestamp": "2025-09-30 22:19:16.557846", "step": 4379, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:16.593196", "step": 4379, "epoch": 3 }, { "type": "loss", "content": 0.0019276568200439215, "timestamp": "2025-09-30 22:19:16.625128", "step": 4380, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:16.668052", "step": 4380, "epoch": 3 }, { "type": "loss", "content": 0.002704145386815071, "timestamp": "2025-09-30 22:19:16.676120", "step": 4381, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:16.711014", "step": 4381, "epoch": 3 }, { "type": "loss", "content": 0.004898314829915762, "timestamp": "2025-09-30 22:19:16.718217", "step": 4382, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:16.759335", "step": 4382, "epoch": 3 }, { "type": "loss", "content": 0.0038968524895608425, "timestamp": "2025-09-30 22:19:16.770405", "step": 4383, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:19:16.814699", "step": 4383, "epoch": 3 }, { "type": "loss", "content": 0.0018282084492966533, "timestamp": "2025-09-30 22:19:16.851645", "step": 4384, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:16.888730", "step": 4384, "epoch": 3 }, { "type": "loss", "content": 0.0008353728335350752, "timestamp": "2025-09-30 22:19:16.891394", "step": 4385, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:16.925332", "step": 4385, "epoch": 3 }, { "type": "loss", "content": 0.0059968712739646435, "timestamp": "2025-09-30 22:19:16.932517", "step": 4386, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:16.969629", "step": 4386, "epoch": 3 }, { "type": "loss", "content": 0.008428453467786312, "timestamp": "2025-09-30 22:19:16.977452", "step": 4387, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:17.013635", "step": 4387, "epoch": 3 }, { "type": "loss", "content": 0.0045945607125759125, "timestamp": "2025-09-30 22:19:17.045716", "step": 4388, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:17.080013", "step": 4388, "epoch": 3 }, { "type": "loss", "content": 0.004945170134305954, "timestamp": "2025-09-30 22:19:17.085582", "step": 4389, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:17.131888", "step": 4389, "epoch": 3 }, { "type": "loss", "content": 0.006509590428322554, "timestamp": "2025-09-30 22:19:17.142911", "step": 4390, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:17.176529", "step": 4390, "epoch": 3 }, { "type": "loss", "content": 0.0025962223298847675, "timestamp": "2025-09-30 22:19:17.188775", "step": 4391, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:17.238274", "step": 4391, "epoch": 3 }, { "type": "loss", "content": 0.0030034177470952272, "timestamp": "2025-09-30 22:19:17.266241", "step": 4392, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:17.304115", "step": 4392, "epoch": 3 }, { "type": "loss", "content": 0.007417632266879082, "timestamp": "2025-09-30 22:19:17.312720", "step": 4393, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:17.358520", "step": 4393, "epoch": 3 }, { "type": "loss", "content": 0.0008212727261707187, "timestamp": "2025-09-30 22:19:17.365434", "step": 4394, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:17.401767", "step": 4394, "epoch": 3 }, { "type": "loss", "content": 0.004093260038644075, "timestamp": "2025-09-30 22:19:17.408354", "step": 4395, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:17.451740", "step": 4395, "epoch": 3 }, { "type": "loss", "content": 0.007276744581758976, "timestamp": "2025-09-30 22:19:17.480296", "step": 4396, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:17.516802", "step": 4396, "epoch": 3 }, { "type": "loss", "content": 0.001587934442795813, "timestamp": "2025-09-30 22:19:17.522876", "step": 4397, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:17.560879", "step": 4397, "epoch": 3 }, { "type": "loss", "content": 0.0006083215703256428, "timestamp": "2025-09-30 22:19:17.567864", "step": 4398, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:17.602069", "step": 4398, "epoch": 3 }, { "type": "loss", "content": 0.0009365178411826491, "timestamp": "2025-09-30 22:19:17.609715", "step": 4399, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:17.660608", "step": 4399, "epoch": 3 }, { "type": "loss", "content": 0.0034567248076200485, "timestamp": "2025-09-30 22:19:17.691842", "step": 4400, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:17.734310", "step": 4400, "epoch": 3 }, { "type": "loss", "content": 0.004850293975323439, "timestamp": "2025-09-30 22:19:17.741172", "step": 4401, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:17.776883", "step": 4401, "epoch": 3 }, { "type": "loss", "content": 0.01048145443201065, "timestamp": "2025-09-30 22:19:17.787274", "step": 4402, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:19:17.826689", "step": 4402, "epoch": 3 }, { "type": "loss", "content": 0.001172828022390604, "timestamp": "2025-09-30 22:19:17.830842", "step": 4403, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:17.868167", "step": 4403, "epoch": 3 }, { "type": "loss", "content": 0.0014994451776146889, "timestamp": "2025-09-30 22:19:17.901819", "step": 4404, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:17.945820", "step": 4404, "epoch": 3 }, { "type": "loss", "content": 0.0007781424792483449, "timestamp": "2025-09-30 22:19:17.950475", "step": 4405, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:17.993472", "step": 4405, "epoch": 3 }, { "type": "loss", "content": 0.018517231568694115, "timestamp": "2025-09-30 22:19:18.000423", "step": 4406, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:18.035649", "step": 4406, "epoch": 3 }, { "type": "loss", "content": 0.004184328485280275, "timestamp": "2025-09-30 22:19:18.042917", "step": 4407, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:18.090234", "step": 4407, "epoch": 3 }, { "type": "loss", "content": 0.0008757567848078907, "timestamp": "2025-09-30 22:19:18.118391", "step": 4408, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:18.158044", "step": 4408, "epoch": 3 }, { "type": "loss", "content": 0.001239742268808186, "timestamp": "2025-09-30 22:19:18.164705", "step": 4409, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:18.206995", "step": 4409, "epoch": 3 }, { "type": "loss", "content": 0.0006983289495110512, "timestamp": "2025-09-30 22:19:18.213877", "step": 4410, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:18.255830", "step": 4410, "epoch": 3 }, { "type": "loss", "content": 0.00456323241814971, "timestamp": "2025-09-30 22:19:18.263054", "step": 4411, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:19:18.308768", "step": 4411, "epoch": 3 }, { "type": "loss", "content": 0.007104892283678055, "timestamp": "2025-09-30 22:19:18.345529", "step": 4412, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:18.385415", "step": 4412, "epoch": 3 }, { "type": "loss", "content": 0.00333253457210958, "timestamp": "2025-09-30 22:19:18.390423", "step": 4413, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:18.424520", "step": 4413, "epoch": 3 }, { "type": "loss", "content": 0.0020394609309732914, "timestamp": "2025-09-30 22:19:18.431756", "step": 4414, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:18.481909", "step": 4414, "epoch": 3 }, { "type": "loss", "content": 0.003956458065658808, "timestamp": "2025-09-30 22:19:18.492897", "step": 4415, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:18.534671", "step": 4415, "epoch": 3 }, { "type": "loss", "content": 0.0042778183706104755, "timestamp": "2025-09-30 22:19:18.571661", "step": 4416, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:18.607058", "step": 4416, "epoch": 3 }, { "type": "loss", "content": 0.003295444417744875, "timestamp": "2025-09-30 22:19:18.615551", "step": 4417, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:18.654541", "step": 4417, "epoch": 3 }, { "type": "loss", "content": 0.003468771930783987, "timestamp": "2025-09-30 22:19:18.670743", "step": 4418, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:18.719928", "step": 4418, "epoch": 3 }, { "type": "loss", "content": 0.00900344830006361, "timestamp": "2025-09-30 22:19:18.734348", "step": 4419, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:18.783891", "step": 4419, "epoch": 3 }, { "type": "loss", "content": 0.0028677969239652157, "timestamp": "2025-09-30 22:19:18.815088", "step": 4420, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:18.862323", "step": 4420, "epoch": 3 }, { "type": "loss", "content": 0.002005532383918762, "timestamp": "2025-09-30 22:19:18.875823", "step": 4421, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:18.909578", "step": 4421, "epoch": 3 }, { "type": "loss", "content": 0.0024478633422404528, "timestamp": "2025-09-30 22:19:18.922050", "step": 4422, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:18.955193", "step": 4422, "epoch": 3 }, { "type": "loss", "content": 0.007036969996988773, "timestamp": "2025-09-30 22:19:18.967169", "step": 4423, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:19.009048", "step": 4423, "epoch": 3 }, { "type": "loss", "content": 0.009061479941010475, "timestamp": "2025-09-30 22:19:19.039253", "step": 4424, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:19.091977", "step": 4424, "epoch": 3 }, { "type": "loss", "content": 0.002791600301861763, "timestamp": "2025-09-30 22:19:19.107069", "step": 4425, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:19.155531", "step": 4425, "epoch": 3 }, { "type": "loss", "content": 0.007210858631879091, "timestamp": "2025-09-30 22:19:19.163385", "step": 4426, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:19.197677", "step": 4426, "epoch": 3 }, { "type": "loss", "content": 0.0018888312624767423, "timestamp": "2025-09-30 22:19:19.208669", "step": 4427, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:19.266341", "step": 4427, "epoch": 3 }, { "type": "loss", "content": 0.007950611412525177, "timestamp": "2025-09-30 22:19:19.297482", "step": 4428, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:19.351458", "step": 4428, "epoch": 3 }, { "type": "loss", "content": 0.008298023603856564, "timestamp": "2025-09-30 22:19:19.357117", "step": 4429, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:19:19.418515", "step": 4429, "epoch": 3 }, { "type": "loss", "content": 0.005394228268414736, "timestamp": "2025-09-30 22:19:19.435838", "step": 4430, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:19.491633", "step": 4430, "epoch": 3 }, { "type": "loss", "content": 0.000268581003183499, "timestamp": "2025-09-30 22:19:19.504008", "step": 4431, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:19.541651", "step": 4431, "epoch": 3 }, { "type": "loss", "content": 0.002814779058098793, "timestamp": "2025-09-30 22:19:19.570146", "step": 4432, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:19.606405", "step": 4432, "epoch": 3 }, { "type": "loss", "content": 0.006743252277374268, "timestamp": "2025-09-30 22:19:19.616846", "step": 4433, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:19.664633", "step": 4433, "epoch": 3 }, { "type": "loss", "content": 0.002839987864717841, "timestamp": "2025-09-30 22:19:19.672521", "step": 4434, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:19.712066", "step": 4434, "epoch": 3 }, { "type": "loss", "content": 0.0068497913889586926, "timestamp": "2025-09-30 22:19:19.723016", "step": 4435, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:19.780263", "step": 4435, "epoch": 3 }, { "type": "loss", "content": 0.00204927078448236, "timestamp": "2025-09-30 22:19:19.808892", "step": 4436, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:19.865423", "step": 4436, "epoch": 3 }, { "type": "loss", "content": 0.0019519092747941613, "timestamp": "2025-09-30 22:19:19.879232", "step": 4437, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:19.926929", "step": 4437, "epoch": 3 }, { "type": "loss", "content": 0.0030627211090177298, "timestamp": "2025-09-30 22:19:19.937974", "step": 4438, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:19.976075", "step": 4438, "epoch": 3 }, { "type": "loss", "content": 0.003808629233390093, "timestamp": "2025-09-30 22:19:19.987073", "step": 4439, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:20.026474", "step": 4439, "epoch": 3 }, { "type": "loss", "content": 0.004874465521425009, "timestamp": "2025-09-30 22:19:20.061089", "step": 4440, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:20.103790", "step": 4440, "epoch": 3 }, { "type": "loss", "content": 0.00819898210465908, "timestamp": "2025-09-30 22:19:20.112264", "step": 4441, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:20.149733", "step": 4441, "epoch": 3 }, { "type": "loss", "content": 0.007409947458654642, "timestamp": "2025-09-30 22:19:20.160775", "step": 4442, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:20.208537", "step": 4442, "epoch": 3 }, { "type": "loss", "content": 0.0024290422443300486, "timestamp": "2025-09-30 22:19:20.220307", "step": 4443, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:20.261432", "step": 4443, "epoch": 3 }, { "type": "loss", "content": 0.00824358407407999, "timestamp": "2025-09-30 22:19:20.294862", "step": 4444, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:20.333650", "step": 4444, "epoch": 3 }, { "type": "loss", "content": 0.0036941298749297857, "timestamp": "2025-09-30 22:19:20.344272", "step": 4445, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:20.380994", "step": 4445, "epoch": 3 }, { "type": "loss", "content": 0.000923198414966464, "timestamp": "2025-09-30 22:19:20.392046", "step": 4446, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:20.442016", "step": 4446, "epoch": 3 }, { "type": "loss", "content": 0.006019048858433962, "timestamp": "2025-09-30 22:19:20.455848", "step": 4447, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:20.492765", "step": 4447, "epoch": 3 }, { "type": "loss", "content": 0.010113263502717018, "timestamp": "2025-09-30 22:19:20.527319", "step": 4448, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:20.563947", "step": 4448, "epoch": 3 }, { "type": "loss", "content": 0.004297421779483557, "timestamp": "2025-09-30 22:19:20.577071", "step": 4449, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:20.618789", "step": 4449, "epoch": 3 }, { "type": "loss", "content": 0.0028478566091507673, "timestamp": "2025-09-30 22:19:20.633013", "step": 4450, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:20.672329", "step": 4450, "epoch": 3 }, { "type": "loss", "content": 0.003244199324399233, "timestamp": "2025-09-30 22:19:20.686099", "step": 4451, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:20.722592", "step": 4451, "epoch": 3 }, { "type": "loss", "content": 0.004726372193545103, "timestamp": "2025-09-30 22:19:20.754393", "step": 4452, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:20.798280", "step": 4452, "epoch": 3 }, { "type": "loss", "content": 0.004119518678635359, "timestamp": "2025-09-30 22:19:20.813189", "step": 4453, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 640 ], "flops": 18984411776512 }, "timestamp": "2025-09-30 22:19:20.891633", "step": 4453, "epoch": 3 }, { "type": "loss", "content": 0.0023224842734634876, "timestamp": "2025-09-30 22:19:20.913372", "step": 4454, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:20.955799", "step": 4454, "epoch": 3 }, { "type": "loss", "content": 0.0011501448461785913, "timestamp": "2025-09-30 22:19:20.966264", "step": 4455, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:21.008962", "step": 4455, "epoch": 3 }, { "type": "loss", "content": 0.0017262777546420693, "timestamp": "2025-09-30 22:19:21.034266", "step": 4456, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:21.068207", "step": 4456, "epoch": 3 }, { "type": "loss", "content": 0.0025809993967413902, "timestamp": "2025-09-30 22:19:21.082493", "step": 4457, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:21.124441", "step": 4457, "epoch": 3 }, { "type": "loss", "content": 0.0014467294095084071, "timestamp": "2025-09-30 22:19:21.134682", "step": 4458, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:21.177834", "step": 4458, "epoch": 3 }, { "type": "loss", "content": 0.004279765300452709, "timestamp": "2025-09-30 22:19:21.184853", "step": 4459, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:21.224372", "step": 4459, "epoch": 3 }, { "type": "loss", "content": 0.0008534055668860674, "timestamp": "2025-09-30 22:19:21.258934", "step": 4460, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:21.302589", "step": 4460, "epoch": 3 }, { "type": "loss", "content": 0.004186238162219524, "timestamp": "2025-09-30 22:19:21.312027", "step": 4461, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:21.363409", "step": 4461, "epoch": 3 }, { "type": "loss", "content": 0.003403113689273596, "timestamp": "2025-09-30 22:19:21.376764", "step": 4462, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:21.413346", "step": 4462, "epoch": 3 }, { "type": "loss", "content": 0.0028697995003312826, "timestamp": "2025-09-30 22:19:21.428278", "step": 4463, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:21.490774", "step": 4463, "epoch": 3 }, { "type": "loss", "content": 0.007357324473559856, "timestamp": "2025-09-30 22:19:21.525447", "step": 4464, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:21.569338", "step": 4464, "epoch": 3 }, { "type": "loss", "content": 0.004044624045491219, "timestamp": "2025-09-30 22:19:21.582653", "step": 4465, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:21.628087", "step": 4465, "epoch": 3 }, { "type": "loss", "content": 0.0034907220397144556, "timestamp": "2025-09-30 22:19:21.636082", "step": 4466, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:21.674342", "step": 4466, "epoch": 3 }, { "type": "loss", "content": 0.005186514463275671, "timestamp": "2025-09-30 22:19:21.681889", "step": 4467, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:21.724822", "step": 4467, "epoch": 3 }, { "type": "loss", "content": 0.0030226618982851505, "timestamp": "2025-09-30 22:19:21.758221", "step": 4468, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:19:21.798078", "step": 4468, "epoch": 3 }, { "type": "loss", "content": 0.002067551016807556, "timestamp": "2025-09-30 22:19:21.813482", "step": 4469, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:21.861177", "step": 4469, "epoch": 3 }, { "type": "loss", "content": 0.0033264169469475746, "timestamp": "2025-09-30 22:19:21.869032", "step": 4470, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:21.915432", "step": 4470, "epoch": 3 }, { "type": "loss", "content": 0.0025733960792422295, "timestamp": "2025-09-30 22:19:21.928758", "step": 4471, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:21.969013", "step": 4471, "epoch": 3 }, { "type": "loss", "content": 0.049540817737579346, "timestamp": "2025-09-30 22:19:22.002427", "step": 4472, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:22.049690", "step": 4472, "epoch": 3 }, { "type": "loss", "content": 0.005924369674175978, "timestamp": "2025-09-30 22:19:22.060279", "step": 4473, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:22.128334", "step": 4473, "epoch": 3 }, { "type": "loss", "content": 0.0011542935390025377, "timestamp": "2025-09-30 22:19:22.135574", "step": 4474, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:19:22.187271", "step": 4474, "epoch": 3 }, { "type": "loss", "content": 0.0035477534402161837, "timestamp": "2025-09-30 22:19:22.206285", "step": 4475, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:22.262631", "step": 4475, "epoch": 3 }, { "type": "loss", "content": 0.010069957002997398, "timestamp": "2025-09-30 22:19:22.290553", "step": 4476, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:22.331817", "step": 4476, "epoch": 3 }, { "type": "loss", "content": 0.0012309798039495945, "timestamp": "2025-09-30 22:19:22.337509", "step": 4477, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:22.377893", "step": 4477, "epoch": 3 }, { "type": "loss", "content": 0.010741564445197582, "timestamp": "2025-09-30 22:19:22.391732", "step": 4478, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:22.447550", "step": 4478, "epoch": 3 }, { "type": "loss", "content": 0.00607129605486989, "timestamp": "2025-09-30 22:19:22.455257", "step": 4479, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:22.503074", "step": 4479, "epoch": 3 }, { "type": "loss", "content": 0.005703271832317114, "timestamp": "2025-09-30 22:19:22.532020", "step": 4480, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:22.568373", "step": 4480, "epoch": 3 }, { "type": "loss", "content": 0.0015396331436932087, "timestamp": "2025-09-30 22:19:22.578988", "step": 4481, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:22.614057", "step": 4481, "epoch": 3 }, { "type": "loss", "content": 0.003246902721002698, "timestamp": "2025-09-30 22:19:22.624342", "step": 4482, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:22.659338", "step": 4482, "epoch": 3 }, { "type": "loss", "content": 0.004048208240419626, "timestamp": "2025-09-30 22:19:22.670807", "step": 4483, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:22.712806", "step": 4483, "epoch": 3 }, { "type": "loss", "content": 0.008900952525436878, "timestamp": "2025-09-30 22:19:22.744077", "step": 4484, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:22.780796", "step": 4484, "epoch": 3 }, { "type": "loss", "content": 0.0058488743379712105, "timestamp": "2025-09-30 22:19:22.791327", "step": 4485, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:19:25.343183", "step": 4485, "epoch": 3 }, { "type": "pplx", "content": 5.795513532637302, "timestamp": "2025-09-30 22:19:25.355820", "step": 4485, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:25.394515", "step": 4485, "epoch": 3 }, { "type": "loss", "content": 0.002202738542109728, "timestamp": "2025-09-30 22:19:25.400758", "step": 4486, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:25.438632", "step": 4486, "epoch": 3 }, { "type": "loss", "content": 0.001024956232868135, "timestamp": "2025-09-30 22:19:25.449111", "step": 4487, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:25.489632", "step": 4487, "epoch": 3 }, { "type": "loss", "content": 0.001970956800505519, "timestamp": "2025-09-30 22:19:25.523713", "step": 4488, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:25.562660", "step": 4488, "epoch": 3 }, { "type": "loss", "content": 0.010650524869561195, "timestamp": "2025-09-30 22:19:25.571471", "step": 4489, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:25.607493", "step": 4489, "epoch": 3 }, { "type": "loss", "content": 0.003535045078024268, "timestamp": "2025-09-30 22:19:25.617205", "step": 4490, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:25.656034", "step": 4490, "epoch": 3 }, { "type": "loss", "content": 0.0014868737198412418, "timestamp": "2025-09-30 22:19:25.668287", "step": 4491, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:25.707603", "step": 4491, "epoch": 3 }, { "type": "loss", "content": 0.004532901104539633, "timestamp": "2025-09-30 22:19:25.740018", "step": 4492, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:25.777722", "step": 4492, "epoch": 3 }, { "type": "loss", "content": 0.009004509076476097, "timestamp": "2025-09-30 22:19:25.785619", "step": 4493, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:25.845473", "step": 4493, "epoch": 3 }, { "type": "loss", "content": 0.001528367749415338, "timestamp": "2025-09-30 22:19:25.856924", "step": 4494, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:25.910174", "step": 4494, "epoch": 3 }, { "type": "loss", "content": 0.004119920544326305, "timestamp": "2025-09-30 22:19:25.925795", "step": 4495, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:25.972438", "step": 4495, "epoch": 3 }, { "type": "loss", "content": 0.005202796310186386, "timestamp": "2025-09-30 22:19:26.000510", "step": 4496, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:26.061552", "step": 4496, "epoch": 3 }, { "type": "loss", "content": 0.002058766083791852, "timestamp": "2025-09-30 22:19:26.074841", "step": 4497, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:19:26.141053", "step": 4497, "epoch": 3 }, { "type": "loss", "content": 0.00176495430059731, "timestamp": "2025-09-30 22:19:26.158449", "step": 4498, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:26.202911", "step": 4498, "epoch": 3 }, { "type": "loss", "content": 0.008682888932526112, "timestamp": "2025-09-30 22:19:26.215560", "step": 4499, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:26.252182", "step": 4499, "epoch": 3 }, { "type": "loss", "content": 0.00501663563773036, "timestamp": "2025-09-30 22:19:26.283402", "step": 4500, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 4500", "timestamp": "2025-09-30 22:19:31.309914", "step": 4500, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:31.359179", "step": 4500, "epoch": 3 }, { "type": "loss", "content": 0.003126199124380946, "timestamp": "2025-09-30 22:19:31.372107", "step": 4501, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:31.406558", "step": 4501, "epoch": 3 }, { "type": "loss", "content": 0.01422073319554329, "timestamp": "2025-09-30 22:19:31.417430", "step": 4502, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:31.463988", "step": 4502, "epoch": 3 }, { "type": "loss", "content": 0.004692488815635443, "timestamp": "2025-09-30 22:19:31.471643", "step": 4503, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:31.520737", "step": 4503, "epoch": 3 }, { "type": "loss", "content": 0.0015276926569640636, "timestamp": "2025-09-30 22:19:31.555586", "step": 4504, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:31.594486", "step": 4504, "epoch": 3 }, { "type": "loss", "content": 0.004904737696051598, "timestamp": "2025-09-30 22:19:31.605122", "step": 4505, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:31.644218", "step": 4505, "epoch": 3 }, { "type": "loss", "content": 0.005495449062436819, "timestamp": "2025-09-30 22:19:31.655271", "step": 4506, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:31.697492", "step": 4506, "epoch": 3 }, { "type": "loss", "content": 0.008430728688836098, "timestamp": "2025-09-30 22:19:31.711452", "step": 4507, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:31.756307", "step": 4507, "epoch": 3 }, { "type": "loss", "content": 0.007869812659919262, "timestamp": "2025-09-30 22:19:31.784700", "step": 4508, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:31.830203", "step": 4508, "epoch": 3 }, { "type": "loss", "content": 0.002214880893006921, "timestamp": "2025-09-30 22:19:31.840630", "step": 4509, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:31.878999", "step": 4509, "epoch": 3 }, { "type": "loss", "content": 0.0030361057724803686, "timestamp": "2025-09-30 22:19:31.886720", "step": 4510, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:31.926991", "step": 4510, "epoch": 3 }, { "type": "loss", "content": 0.002759290626272559, "timestamp": "2025-09-30 22:19:31.934745", "step": 4511, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:31.973961", "step": 4511, "epoch": 3 }, { "type": "loss", "content": 0.0018279353389516473, "timestamp": "2025-09-30 22:19:32.005389", "step": 4512, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:32.045766", "step": 4512, "epoch": 3 }, { "type": "loss", "content": 0.011083441786468029, "timestamp": "2025-09-30 22:19:32.051414", "step": 4513, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:32.093994", "step": 4513, "epoch": 3 }, { "type": "loss", "content": 0.0008053503697738051, "timestamp": "2025-09-30 22:19:32.106568", "step": 4514, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:32.146869", "step": 4514, "epoch": 3 }, { "type": "loss", "content": 0.0013490010751411319, "timestamp": "2025-09-30 22:19:32.159453", "step": 4515, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:32.228080", "step": 4515, "epoch": 3 }, { "type": "loss", "content": 0.003745648544281721, "timestamp": "2025-09-30 22:19:32.260156", "step": 4516, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:32.311933", "step": 4516, "epoch": 3 }, { "type": "loss", "content": 0.0028096225578337908, "timestamp": "2025-09-30 22:19:32.318028", "step": 4517, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:32.360083", "step": 4517, "epoch": 3 }, { "type": "loss", "content": 0.004467432387173176, "timestamp": "2025-09-30 22:19:32.371190", "step": 4518, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:32.416700", "step": 4518, "epoch": 3 }, { "type": "loss", "content": 0.00493756914511323, "timestamp": "2025-09-30 22:19:32.430430", "step": 4519, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:32.478984", "step": 4519, "epoch": 3 }, { "type": "loss", "content": 0.004701991565525532, "timestamp": "2025-09-30 22:19:32.510094", "step": 4520, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:32.557141", "step": 4520, "epoch": 3 }, { "type": "loss", "content": 0.008878360502421856, "timestamp": "2025-09-30 22:19:32.562893", "step": 4521, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:32.597872", "step": 4521, "epoch": 3 }, { "type": "loss", "content": 0.003462841734290123, "timestamp": "2025-09-30 22:19:32.608997", "step": 4522, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:32.665017", "step": 4522, "epoch": 3 }, { "type": "loss", "content": 0.015827376395463943, "timestamp": "2025-09-30 22:19:32.675269", "step": 4523, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:32.711444", "step": 4523, "epoch": 3 }, { "type": "loss", "content": 0.003019185969606042, "timestamp": "2025-09-30 22:19:32.744561", "step": 4524, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:32.789171", "step": 4524, "epoch": 3 }, { "type": "loss", "content": 0.0021726477425545454, "timestamp": "2025-09-30 22:19:32.797833", "step": 4525, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:32.837458", "step": 4525, "epoch": 3 }, { "type": "loss", "content": 0.0042687393724918365, "timestamp": "2025-09-30 22:19:32.845431", "step": 4526, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:32.882653", "step": 4526, "epoch": 3 }, { "type": "loss", "content": 0.004522709175944328, "timestamp": "2025-09-30 22:19:32.889794", "step": 4527, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:32.922349", "step": 4527, "epoch": 3 }, { "type": "loss", "content": 0.005108777899295092, "timestamp": "2025-09-30 22:19:32.950849", "step": 4528, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:32.988486", "step": 4528, "epoch": 3 }, { "type": "loss", "content": 0.0053230454213917255, "timestamp": "2025-09-30 22:19:32.998325", "step": 4529, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:33.032370", "step": 4529, "epoch": 3 }, { "type": "loss", "content": 0.005572678055614233, "timestamp": "2025-09-30 22:19:33.044810", "step": 4530, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:33.085025", "step": 4530, "epoch": 3 }, { "type": "loss", "content": 0.003167995484545827, "timestamp": "2025-09-30 22:19:33.098737", "step": 4531, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:33.137690", "step": 4531, "epoch": 3 }, { "type": "loss", "content": 0.0014535030350089073, "timestamp": "2025-09-30 22:19:33.166124", "step": 4532, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:33.200752", "step": 4532, "epoch": 3 }, { "type": "loss", "content": 0.010079230181872845, "timestamp": "2025-09-30 22:19:33.206397", "step": 4533, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:33.247966", "step": 4533, "epoch": 3 }, { "type": "loss", "content": 0.0051791672594845295, "timestamp": "2025-09-30 22:19:33.258505", "step": 4534, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:33.297368", "step": 4534, "epoch": 3 }, { "type": "loss", "content": 0.005085056647658348, "timestamp": "2025-09-30 22:19:33.308492", "step": 4535, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:33.348339", "step": 4535, "epoch": 3 }, { "type": "loss", "content": 0.0078585809096694, "timestamp": "2025-09-30 22:19:33.377208", "step": 4536, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-30 22:19:33.423072", "step": 4536, "epoch": 3 }, { "type": "loss", "content": 0.004234223160892725, "timestamp": "2025-09-30 22:19:33.442306", "step": 4537, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:33.481406", "step": 4537, "epoch": 3 }, { "type": "loss", "content": 0.002334218705072999, "timestamp": "2025-09-30 22:19:33.493957", "step": 4538, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:33.531691", "step": 4538, "epoch": 3 }, { "type": "loss", "content": 0.011637231335043907, "timestamp": "2025-09-30 22:19:33.545518", "step": 4539, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:33.584546", "step": 4539, "epoch": 3 }, { "type": "loss", "content": 0.0036545591428875923, "timestamp": "2025-09-30 22:19:33.618787", "step": 4540, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:33.658187", "step": 4540, "epoch": 3 }, { "type": "loss", "content": 0.0018542211037129164, "timestamp": "2025-09-30 22:19:33.667590", "step": 4541, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:33.719858", "step": 4541, "epoch": 3 }, { "type": "loss", "content": 0.0016864044591784477, "timestamp": "2025-09-30 22:19:33.733520", "step": 4542, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:33.767590", "step": 4542, "epoch": 3 }, { "type": "loss", "content": 0.0020412022713571787, "timestamp": "2025-09-30 22:19:33.779761", "step": 4543, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:33.825168", "step": 4543, "epoch": 3 }, { "type": "loss", "content": 0.004759989213198423, "timestamp": "2025-09-30 22:19:33.858343", "step": 4544, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:33.898086", "step": 4544, "epoch": 3 }, { "type": "loss", "content": 0.00546990055590868, "timestamp": "2025-09-30 22:19:33.908074", "step": 4545, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:33.962281", "step": 4545, "epoch": 3 }, { "type": "loss", "content": 0.00273419008590281, "timestamp": "2025-09-30 22:19:33.974656", "step": 4546, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:34.016467", "step": 4546, "epoch": 3 }, { "type": "loss", "content": 0.010769542306661606, "timestamp": "2025-09-30 22:19:34.030472", "step": 4547, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:34.074285", "step": 4547, "epoch": 3 }, { "type": "loss", "content": 0.0031620825175195932, "timestamp": "2025-09-30 22:19:34.103023", "step": 4548, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:34.147515", "step": 4548, "epoch": 3 }, { "type": "loss", "content": 0.003730887547135353, "timestamp": "2025-09-30 22:19:34.163035", "step": 4549, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:34.203874", "step": 4549, "epoch": 3 }, { "type": "loss", "content": 0.002586633199825883, "timestamp": "2025-09-30 22:19:34.216436", "step": 4550, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:34.255486", "step": 4550, "epoch": 3 }, { "type": "loss", "content": 0.0012792575871571898, "timestamp": "2025-09-30 22:19:34.263083", "step": 4551, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:34.302280", "step": 4551, "epoch": 3 }, { "type": "loss", "content": 0.003631437197327614, "timestamp": "2025-09-30 22:19:34.331090", "step": 4552, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:34.368618", "step": 4552, "epoch": 3 }, { "type": "loss", "content": 0.004950222093611956, "timestamp": "2025-09-30 22:19:34.379083", "step": 4553, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:34.422989", "step": 4553, "epoch": 3 }, { "type": "loss", "content": 0.0036054716911166906, "timestamp": "2025-09-30 22:19:34.436797", "step": 4554, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:34.477069", "step": 4554, "epoch": 3 }, { "type": "loss", "content": 0.0020827697589993477, "timestamp": "2025-09-30 22:19:34.489445", "step": 4555, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:34.525800", "step": 4555, "epoch": 3 }, { "type": "loss", "content": 0.003716694889590144, "timestamp": "2025-09-30 22:19:34.559014", "step": 4556, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:34.600518", "step": 4556, "epoch": 3 }, { "type": "loss", "content": 0.0007190012838691473, "timestamp": "2025-09-30 22:19:34.610510", "step": 4557, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:34.657518", "step": 4557, "epoch": 3 }, { "type": "loss", "content": 0.005343511700630188, "timestamp": "2025-09-30 22:19:34.669788", "step": 4558, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:34.713873", "step": 4558, "epoch": 3 }, { "type": "loss", "content": 0.002123891608789563, "timestamp": "2025-09-30 22:19:34.724974", "step": 4559, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:34.765335", "step": 4559, "epoch": 3 }, { "type": "loss", "content": 0.0020689836237579584, "timestamp": "2025-09-30 22:19:34.793883", "step": 4560, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:34.832596", "step": 4560, "epoch": 3 }, { "type": "loss", "content": 0.005065195262432098, "timestamp": "2025-09-30 22:19:34.845951", "step": 4561, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:34.884767", "step": 4561, "epoch": 3 }, { "type": "loss", "content": 0.00638920022174716, "timestamp": "2025-09-30 22:19:34.898497", "step": 4562, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:34.937061", "step": 4562, "epoch": 3 }, { "type": "loss", "content": 0.0024130765814334154, "timestamp": "2025-09-30 22:19:34.949688", "step": 4563, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:34.991363", "step": 4563, "epoch": 3 }, { "type": "loss", "content": 0.009124759584665298, "timestamp": "2025-09-30 22:19:35.025565", "step": 4564, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:35.064008", "step": 4564, "epoch": 3 }, { "type": "loss", "content": 0.0013621627585962415, "timestamp": "2025-09-30 22:19:35.073866", "step": 4565, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:35.115166", "step": 4565, "epoch": 3 }, { "type": "loss", "content": 0.005780582316219807, "timestamp": "2025-09-30 22:19:35.125457", "step": 4566, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:35.161516", "step": 4566, "epoch": 3 }, { "type": "loss", "content": 0.004457356408238411, "timestamp": "2025-09-30 22:19:35.168562", "step": 4567, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:35.218344", "step": 4567, "epoch": 3 }, { "type": "loss", "content": 0.000518388522323221, "timestamp": "2025-09-30 22:19:35.253237", "step": 4568, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:35.294771", "step": 4568, "epoch": 3 }, { "type": "loss", "content": 0.005421688314527273, "timestamp": "2025-09-30 22:19:35.304611", "step": 4569, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:35.346315", "step": 4569, "epoch": 3 }, { "type": "loss", "content": 0.0036547333002090454, "timestamp": "2025-09-30 22:19:35.360070", "step": 4570, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:35.396271", "step": 4570, "epoch": 3 }, { "type": "loss", "content": 0.007334040943533182, "timestamp": "2025-09-30 22:19:35.408659", "step": 4571, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:35.446802", "step": 4571, "epoch": 3 }, { "type": "loss", "content": 0.0014599317219108343, "timestamp": "2025-09-30 22:19:35.478668", "step": 4572, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:35.514337", "step": 4572, "epoch": 3 }, { "type": "loss", "content": 0.0040939245373010635, "timestamp": "2025-09-30 22:19:35.522476", "step": 4573, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:35.564827", "step": 4573, "epoch": 3 }, { "type": "loss", "content": 0.0051482319831848145, "timestamp": "2025-09-30 22:19:35.578547", "step": 4574, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:35.622731", "step": 4574, "epoch": 3 }, { "type": "loss", "content": 0.002121998928487301, "timestamp": "2025-09-30 22:19:35.635040", "step": 4575, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:35.684360", "step": 4575, "epoch": 3 }, { "type": "loss", "content": 0.00047555830678902566, "timestamp": "2025-09-30 22:19:35.720822", "step": 4576, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:35.760202", "step": 4576, "epoch": 3 }, { "type": "loss", "content": 0.0017047971487045288, "timestamp": "2025-09-30 22:19:35.770457", "step": 4577, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:35.810577", "step": 4577, "epoch": 3 }, { "type": "loss", "content": 0.0023253338877111673, "timestamp": "2025-09-30 22:19:35.822895", "step": 4578, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:35.856316", "step": 4578, "epoch": 3 }, { "type": "loss", "content": 0.002054597483947873, "timestamp": "2025-09-30 22:19:35.870507", "step": 4579, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:35.908699", "step": 4579, "epoch": 3 }, { "type": "loss", "content": 0.0006414930685423315, "timestamp": "2025-09-30 22:19:35.936668", "step": 4580, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:35.971631", "step": 4580, "epoch": 3 }, { "type": "loss", "content": 0.0006736897048540413, "timestamp": "2025-09-30 22:19:35.976553", "step": 4581, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:36.021701", "step": 4581, "epoch": 3 }, { "type": "loss", "content": 0.002001025015488267, "timestamp": "2025-09-30 22:19:36.032984", "step": 4582, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:36.067324", "step": 4582, "epoch": 3 }, { "type": "loss", "content": 0.0002340712962904945, "timestamp": "2025-09-30 22:19:36.077736", "step": 4583, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:36.112009", "step": 4583, "epoch": 3 }, { "type": "loss", "content": 0.0006811887142248452, "timestamp": "2025-09-30 22:19:36.143946", "step": 4584, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:36.184921", "step": 4584, "epoch": 3 }, { "type": "loss", "content": 0.0010683821747079492, "timestamp": "2025-09-30 22:19:36.193513", "step": 4585, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:36.235100", "step": 4585, "epoch": 3 }, { "type": "loss", "content": 0.005461221560835838, "timestamp": "2025-09-30 22:19:36.248455", "step": 4586, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:36.281608", "step": 4586, "epoch": 3 }, { "type": "loss", "content": 0.0006103842169977725, "timestamp": "2025-09-30 22:19:36.292621", "step": 4587, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:36.333461", "step": 4587, "epoch": 3 }, { "type": "loss", "content": 0.0017355261370539665, "timestamp": "2025-09-30 22:19:36.360914", "step": 4588, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:36.397875", "step": 4588, "epoch": 3 }, { "type": "loss", "content": 0.0022532320581376553, "timestamp": "2025-09-30 22:19:36.406639", "step": 4589, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:36.455523", "step": 4589, "epoch": 3 }, { "type": "loss", "content": 0.004017478786408901, "timestamp": "2025-09-30 22:19:36.466679", "step": 4590, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:36.505417", "step": 4590, "epoch": 3 }, { "type": "loss", "content": 0.0012854663655161858, "timestamp": "2025-09-30 22:19:36.515902", "step": 4591, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:36.579035", "step": 4591, "epoch": 3 }, { "type": "loss", "content": 0.008219798095524311, "timestamp": "2025-09-30 22:19:36.608591", "step": 4592, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:36.657192", "step": 4592, "epoch": 3 }, { "type": "loss", "content": 0.0021332986652851105, "timestamp": "2025-09-30 22:19:36.666962", "step": 4593, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:36.715361", "step": 4593, "epoch": 3 }, { "type": "loss", "content": 0.0011237307917326689, "timestamp": "2025-09-30 22:19:36.719861", "step": 4594, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:36.763505", "step": 4594, "epoch": 3 }, { "type": "loss", "content": 0.0002990429929923266, "timestamp": "2025-09-30 22:19:36.771576", "step": 4595, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:36.814773", "step": 4595, "epoch": 3 }, { "type": "loss", "content": 0.003179828403517604, "timestamp": "2025-09-30 22:19:36.840896", "step": 4596, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:36.881684", "step": 4596, "epoch": 3 }, { "type": "loss", "content": 0.0031162381637841463, "timestamp": "2025-09-30 22:19:36.886663", "step": 4597, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:36.929348", "step": 4597, "epoch": 3 }, { "type": "loss", "content": 0.000504596158862114, "timestamp": "2025-09-30 22:19:36.936979", "step": 4598, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:36.989186", "step": 4598, "epoch": 3 }, { "type": "loss", "content": 0.0007450035191141069, "timestamp": "2025-09-30 22:19:36.997008", "step": 4599, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:37.033174", "step": 4599, "epoch": 3 }, { "type": "loss", "content": 0.00017571434727869928, "timestamp": "2025-09-30 22:19:37.064481", "step": 4600, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:19:39.766720", "step": 4600, "epoch": 3 }, { "type": "pplx", "content": 5.656691005321658, "timestamp": "2025-09-30 22:19:39.775260", "step": 4600, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:39.805167", "step": 4600, "epoch": 3 }, { "type": "loss", "content": 0.0016262733843177557, "timestamp": "2025-09-30 22:19:39.812645", "step": 4601, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:39.857086", "step": 4601, "epoch": 3 }, { "type": "loss", "content": 0.0020167173352092505, "timestamp": "2025-09-30 22:19:39.867368", "step": 4602, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:19:39.912503", "step": 4602, "epoch": 3 }, { "type": "loss", "content": 0.005614968482404947, "timestamp": "2025-09-30 22:19:39.930231", "step": 4603, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:39.968839", "step": 4603, "epoch": 3 }, { "type": "loss", "content": 0.005495783872902393, "timestamp": "2025-09-30 22:19:40.002221", "step": 4604, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:40.046830", "step": 4604, "epoch": 3 }, { "type": "loss", "content": 0.018731797114014626, "timestamp": "2025-09-30 22:19:40.059484", "step": 4605, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:40.103731", "step": 4605, "epoch": 3 }, { "type": "loss", "content": 0.0006927954382263124, "timestamp": "2025-09-30 22:19:40.113991", "step": 4606, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:40.152335", "step": 4606, "epoch": 3 }, { "type": "loss", "content": 0.0013639118988066912, "timestamp": "2025-09-30 22:19:40.162699", "step": 4607, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:40.197857", "step": 4607, "epoch": 3 }, { "type": "loss", "content": 0.0010228747269138694, "timestamp": "2025-09-30 22:19:40.233554", "step": 4608, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:40.280263", "step": 4608, "epoch": 3 }, { "type": "loss", "content": 0.0013792128302156925, "timestamp": "2025-09-30 22:19:40.288248", "step": 4609, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:40.323107", "step": 4609, "epoch": 3 }, { "type": "loss", "content": 0.002117737429216504, "timestamp": "2025-09-30 22:19:40.335460", "step": 4610, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:19:40.381733", "step": 4610, "epoch": 3 }, { "type": "loss", "content": 0.0012078828876838088, "timestamp": "2025-09-30 22:19:40.398857", "step": 4611, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:40.437781", "step": 4611, "epoch": 3 }, { "type": "loss", "content": 0.002663145773112774, "timestamp": "2025-09-30 22:19:40.469111", "step": 4612, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:40.503691", "step": 4612, "epoch": 3 }, { "type": "loss", "content": 0.001201400882564485, "timestamp": "2025-09-30 22:19:40.508981", "step": 4613, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:40.543120", "step": 4613, "epoch": 3 }, { "type": "loss", "content": 0.0009282511891797185, "timestamp": "2025-09-30 22:19:40.550316", "step": 4614, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:40.589720", "step": 4614, "epoch": 3 }, { "type": "loss", "content": 0.005988210439682007, "timestamp": "2025-09-30 22:19:40.600119", "step": 4615, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:40.644818", "step": 4615, "epoch": 3 }, { "type": "loss", "content": 7.458464096998796e-05, "timestamp": "2025-09-30 22:19:40.670151", "step": 4616, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:40.713428", "step": 4616, "epoch": 3 }, { "type": "loss", "content": 0.0013050598790869117, "timestamp": "2025-09-30 22:19:40.718264", "step": 4617, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:40.770462", "step": 4617, "epoch": 3 }, { "type": "loss", "content": 0.0040612309239804745, "timestamp": "2025-09-30 22:19:40.784326", "step": 4618, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:40.841919", "step": 4618, "epoch": 3 }, { "type": "loss", "content": 0.000192474210052751, "timestamp": "2025-09-30 22:19:40.858243", "step": 4619, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:40.891975", "step": 4619, "epoch": 3 }, { "type": "loss", "content": 0.00040366427856497467, "timestamp": "2025-09-30 22:19:40.920797", "step": 4620, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:40.961687", "step": 4620, "epoch": 3 }, { "type": "loss", "content": 0.005431856960058212, "timestamp": "2025-09-30 22:19:40.974238", "step": 4621, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:41.023215", "step": 4621, "epoch": 3 }, { "type": "loss", "content": 0.009540246799588203, "timestamp": "2025-09-30 22:19:41.028508", "step": 4622, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:41.068857", "step": 4622, "epoch": 3 }, { "type": "loss", "content": 0.00016501954814884812, "timestamp": "2025-09-30 22:19:41.076044", "step": 4623, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:41.131513", "step": 4623, "epoch": 3 }, { "type": "loss", "content": 0.0013068681582808495, "timestamp": "2025-09-30 22:19:41.165789", "step": 4624, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:19:41.206596", "step": 4624, "epoch": 3 }, { "type": "loss", "content": 0.005706985015422106, "timestamp": "2025-09-30 22:19:41.221978", "step": 4625, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:41.282173", "step": 4625, "epoch": 3 }, { "type": "loss", "content": 0.002301833126693964, "timestamp": "2025-09-30 22:19:41.292744", "step": 4626, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:41.342512", "step": 4626, "epoch": 3 }, { "type": "loss", "content": 0.010077630169689655, "timestamp": "2025-09-30 22:19:41.356219", "step": 4627, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:19:41.408316", "step": 4627, "epoch": 3 }, { "type": "loss", "content": 0.003587552113458514, "timestamp": "2025-09-30 22:19:41.446230", "step": 4628, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:41.485501", "step": 4628, "epoch": 3 }, { "type": "loss", "content": 0.002079867059364915, "timestamp": "2025-09-30 22:19:41.495288", "step": 4629, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:41.534082", "step": 4629, "epoch": 3 }, { "type": "loss", "content": 0.005988697987049818, "timestamp": "2025-09-30 22:19:41.546494", "step": 4630, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:41.589623", "step": 4630, "epoch": 3 }, { "type": "loss", "content": 0.0020013691391795874, "timestamp": "2025-09-30 22:19:41.603360", "step": 4631, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:41.638617", "step": 4631, "epoch": 3 }, { "type": "loss", "content": 0.0010503423400223255, "timestamp": "2025-09-30 22:19:41.669780", "step": 4632, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:41.707403", "step": 4632, "epoch": 3 }, { "type": "loss", "content": 0.002322463784366846, "timestamp": "2025-09-30 22:19:41.715325", "step": 4633, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:41.760474", "step": 4633, "epoch": 3 }, { "type": "loss", "content": 0.0016447566449642181, "timestamp": "2025-09-30 22:19:41.776142", "step": 4634, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:41.810510", "step": 4634, "epoch": 3 }, { "type": "loss", "content": 0.003105347277596593, "timestamp": "2025-09-30 22:19:41.823078", "step": 4635, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:41.860386", "step": 4635, "epoch": 3 }, { "type": "loss", "content": 0.004947391804307699, "timestamp": "2025-09-30 22:19:41.895038", "step": 4636, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:41.950048", "step": 4636, "epoch": 3 }, { "type": "loss", "content": 0.0003328040475025773, "timestamp": "2025-09-30 22:19:41.959936", "step": 4637, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:41.993101", "step": 4637, "epoch": 3 }, { "type": "loss", "content": 0.0006075861747376621, "timestamp": "2025-09-30 22:19:42.000545", "step": 4638, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:42.057172", "step": 4638, "epoch": 3 }, { "type": "loss", "content": 0.0014844061806797981, "timestamp": "2025-09-30 22:19:42.064867", "step": 4639, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:42.106063", "step": 4639, "epoch": 3 }, { "type": "loss", "content": 0.005587319377809763, "timestamp": "2025-09-30 22:19:42.133826", "step": 4640, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:42.183493", "step": 4640, "epoch": 3 }, { "type": "loss", "content": 0.0011380692012608051, "timestamp": "2025-09-30 22:19:42.188688", "step": 4641, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:42.238509", "step": 4641, "epoch": 3 }, { "type": "loss", "content": 0.0002756123139988631, "timestamp": "2025-09-30 22:19:42.249595", "step": 4642, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:42.316745", "step": 4642, "epoch": 3 }, { "type": "loss", "content": 0.0017162506701424718, "timestamp": "2025-09-30 22:19:42.328400", "step": 4643, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:42.378517", "step": 4643, "epoch": 3 }, { "type": "loss", "content": 0.0008271224214695394, "timestamp": "2025-09-30 22:19:42.410499", "step": 4644, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:42.446342", "step": 4644, "epoch": 3 }, { "type": "loss", "content": 0.0012134123826399446, "timestamp": "2025-09-30 22:19:42.456770", "step": 4645, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:42.496616", "step": 4645, "epoch": 3 }, { "type": "loss", "content": 0.007950005121529102, "timestamp": "2025-09-30 22:19:42.507694", "step": 4646, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:42.549047", "step": 4646, "epoch": 3 }, { "type": "loss", "content": 0.0023825380485504866, "timestamp": "2025-09-30 22:19:42.561643", "step": 4647, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:42.602439", "step": 4647, "epoch": 3 }, { "type": "loss", "content": 0.0019718753173947334, "timestamp": "2025-09-30 22:19:42.632142", "step": 4648, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:42.673672", "step": 4648, "epoch": 3 }, { "type": "loss", "content": 0.014634182676672935, "timestamp": "2025-09-30 22:19:42.682308", "step": 4649, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:42.716078", "step": 4649, "epoch": 3 }, { "type": "loss", "content": 0.0015215821331366897, "timestamp": "2025-09-30 22:19:42.723182", "step": 4650, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:42.767185", "step": 4650, "epoch": 3 }, { "type": "loss", "content": 0.007147200405597687, "timestamp": "2025-09-30 22:19:42.778329", "step": 4651, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:42.820664", "step": 4651, "epoch": 3 }, { "type": "loss", "content": 5.3520649089477956e-05, "timestamp": "2025-09-30 22:19:42.852633", "step": 4652, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:42.891055", "step": 4652, "epoch": 3 }, { "type": "loss", "content": 0.0012179957702755928, "timestamp": "2025-09-30 22:19:42.904463", "step": 4653, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:42.946556", "step": 4653, "epoch": 3 }, { "type": "loss", "content": 0.010880755260586739, "timestamp": "2025-09-30 22:19:42.958239", "step": 4654, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:43.005839", "step": 4654, "epoch": 3 }, { "type": "loss", "content": 0.001264199847355485, "timestamp": "2025-09-30 22:19:43.012997", "step": 4655, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:43.059069", "step": 4655, "epoch": 3 }, { "type": "loss", "content": 0.001374272396788001, "timestamp": "2025-09-30 22:19:43.093699", "step": 4656, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:43.131002", "step": 4656, "epoch": 3 }, { "type": "loss", "content": 0.0026250374503433704, "timestamp": "2025-09-30 22:19:43.143605", "step": 4657, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:43.183035", "step": 4657, "epoch": 3 }, { "type": "loss", "content": 0.001169042312540114, "timestamp": "2025-09-30 22:19:43.195286", "step": 4658, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:43.229861", "step": 4658, "epoch": 3 }, { "type": "loss", "content": 0.0010033926228061318, "timestamp": "2025-09-30 22:19:43.242385", "step": 4659, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:43.281345", "step": 4659, "epoch": 3 }, { "type": "loss", "content": 0.007937717251479626, "timestamp": "2025-09-30 22:19:43.315966", "step": 4660, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-30 22:19:43.367063", "step": 4660, "epoch": 3 }, { "type": "loss", "content": 0.002611410105600953, "timestamp": "2025-09-30 22:19:43.386394", "step": 4661, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:43.428308", "step": 4661, "epoch": 3 }, { "type": "loss", "content": 0.001537830918096006, "timestamp": "2025-09-30 22:19:43.443976", "step": 4662, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:43.484425", "step": 4662, "epoch": 3 }, { "type": "loss", "content": 0.0025844555348157883, "timestamp": "2025-09-30 22:19:43.495595", "step": 4663, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:43.539062", "step": 4663, "epoch": 3 }, { "type": "loss", "content": 0.007824945263564587, "timestamp": "2025-09-30 22:19:43.573655", "step": 4664, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:43.614690", "step": 4664, "epoch": 3 }, { "type": "loss", "content": 0.0009558065794408321, "timestamp": "2025-09-30 22:19:43.623966", "step": 4665, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:43.663348", "step": 4665, "epoch": 3 }, { "type": "loss", "content": 0.01655222289264202, "timestamp": "2025-09-30 22:19:43.675841", "step": 4666, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:43.719324", "step": 4666, "epoch": 3 }, { "type": "loss", "content": 0.0012177954195067286, "timestamp": "2025-09-30 22:19:43.734899", "step": 4667, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:43.786009", "step": 4667, "epoch": 3 }, { "type": "loss", "content": 0.0031382772140204906, "timestamp": "2025-09-30 22:19:43.820731", "step": 4668, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:43.860008", "step": 4668, "epoch": 3 }, { "type": "loss", "content": 0.0005979533889330924, "timestamp": "2025-09-30 22:19:43.868549", "step": 4669, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:43.920216", "step": 4669, "epoch": 3 }, { "type": "loss", "content": 0.0010151851456612349, "timestamp": "2025-09-30 22:19:43.933517", "step": 4670, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:43.970539", "step": 4670, "epoch": 3 }, { "type": "loss", "content": 0.0009803612483665347, "timestamp": "2025-09-30 22:19:43.977699", "step": 4671, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:44.017376", "step": 4671, "epoch": 3 }, { "type": "loss", "content": 0.0012478310381993651, "timestamp": "2025-09-30 22:19:44.048548", "step": 4672, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:44.087850", "step": 4672, "epoch": 3 }, { "type": "loss", "content": 0.008808085694909096, "timestamp": "2025-09-30 22:19:44.099007", "step": 4673, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:44.134380", "step": 4673, "epoch": 3 }, { "type": "loss", "content": 0.0010443759383633733, "timestamp": "2025-09-30 22:19:44.141579", "step": 4674, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:44.188309", "step": 4674, "epoch": 3 }, { "type": "loss", "content": 0.002289071911945939, "timestamp": "2025-09-30 22:19:44.199260", "step": 4675, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:19:44.248904", "step": 4675, "epoch": 3 }, { "type": "loss", "content": 0.0038310252130031586, "timestamp": "2025-09-30 22:19:44.288856", "step": 4676, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:44.338180", "step": 4676, "epoch": 3 }, { "type": "loss", "content": 0.0003230969305150211, "timestamp": "2025-09-30 22:19:44.346774", "step": 4677, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:44.384951", "step": 4677, "epoch": 3 }, { "type": "loss", "content": 0.00023587503528688103, "timestamp": "2025-09-30 22:19:44.392194", "step": 4678, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:19:44.440927", "step": 4678, "epoch": 3 }, { "type": "loss", "content": 0.0006135299918241799, "timestamp": "2025-09-30 22:19:44.457240", "step": 4679, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:44.506173", "step": 4679, "epoch": 3 }, { "type": "loss", "content": 0.0010174872586503625, "timestamp": "2025-09-30 22:19:44.535051", "step": 4680, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:44.569337", "step": 4680, "epoch": 3 }, { "type": "loss", "content": 0.0034730613697320223, "timestamp": "2025-09-30 22:19:44.579241", "step": 4681, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:44.613427", "step": 4681, "epoch": 3 }, { "type": "loss", "content": 0.003659060224890709, "timestamp": "2025-09-30 22:19:44.621172", "step": 4682, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:44.665480", "step": 4682, "epoch": 3 }, { "type": "loss", "content": 0.004465331789106131, "timestamp": "2025-09-30 22:19:44.673396", "step": 4683, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:44.710044", "step": 4683, "epoch": 3 }, { "type": "loss", "content": 0.0053755613043904305, "timestamp": "2025-09-30 22:19:44.744291", "step": 4684, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:44.777990", "step": 4684, "epoch": 3 }, { "type": "loss", "content": 0.001365147065371275, "timestamp": "2025-09-30 22:19:44.783298", "step": 4685, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:44.826299", "step": 4685, "epoch": 3 }, { "type": "loss", "content": 0.0025931692216545343, "timestamp": "2025-09-30 22:19:44.837848", "step": 4686, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:44.879541", "step": 4686, "epoch": 3 }, { "type": "loss", "content": 0.004931640345603228, "timestamp": "2025-09-30 22:19:44.891238", "step": 4687, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:44.933341", "step": 4687, "epoch": 3 }, { "type": "loss", "content": 0.004364571999758482, "timestamp": "2025-09-30 22:19:44.965204", "step": 4688, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:45.003072", "step": 4688, "epoch": 3 }, { "type": "loss", "content": 0.00468175346031785, "timestamp": "2025-09-30 22:19:45.005771", "step": 4689, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:45.047548", "step": 4689, "epoch": 3 }, { "type": "loss", "content": 0.0012734520714730024, "timestamp": "2025-09-30 22:19:45.058551", "step": 4690, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:19:45.092072", "step": 4690, "epoch": 3 }, { "type": "loss", "content": 0.002087516477331519, "timestamp": "2025-09-30 22:19:45.101670", "step": 4691, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:45.136529", "step": 4691, "epoch": 3 }, { "type": "loss", "content": 0.0008861172827892005, "timestamp": "2025-09-30 22:19:45.166658", "step": 4692, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:45.204268", "step": 4692, "epoch": 3 }, { "type": "loss", "content": 0.0002589194045867771, "timestamp": "2025-09-30 22:19:45.212857", "step": 4693, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:45.244925", "step": 4693, "epoch": 3 }, { "type": "loss", "content": 0.004392869770526886, "timestamp": "2025-09-30 22:19:45.249359", "step": 4694, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:45.283113", "step": 4694, "epoch": 3 }, { "type": "loss", "content": 0.001689436612650752, "timestamp": "2025-09-30 22:19:45.293505", "step": 4695, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:45.331621", "step": 4695, "epoch": 3 }, { "type": "loss", "content": 0.0020471159368753433, "timestamp": "2025-09-30 22:19:45.366301", "step": 4696, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:45.403012", "step": 4696, "epoch": 3 }, { "type": "loss", "content": 0.003873740555718541, "timestamp": "2025-09-30 22:19:45.416051", "step": 4697, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:45.456013", "step": 4697, "epoch": 3 }, { "type": "loss", "content": 0.0026147381868213415, "timestamp": "2025-09-30 22:19:45.463286", "step": 4698, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:45.509674", "step": 4698, "epoch": 3 }, { "type": "loss", "content": 0.0006632203003391623, "timestamp": "2025-09-30 22:19:45.517249", "step": 4699, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:45.557208", "step": 4699, "epoch": 3 }, { "type": "loss", "content": 0.004550919868052006, "timestamp": "2025-09-30 22:19:45.591516", "step": 4700, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:45.641723", "step": 4700, "epoch": 3 }, { "type": "loss", "content": 0.0008368680137209594, "timestamp": "2025-09-30 22:19:45.650357", "step": 4701, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:45.686989", "step": 4701, "epoch": 3 }, { "type": "loss", "content": 0.002739719580858946, "timestamp": "2025-09-30 22:19:45.698049", "step": 4702, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:45.737941", "step": 4702, "epoch": 3 }, { "type": "loss", "content": 0.000583208107855171, "timestamp": "2025-09-30 22:19:45.751669", "step": 4703, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:45.793151", "step": 4703, "epoch": 3 }, { "type": "loss", "content": 0.010152159258723259, "timestamp": "2025-09-30 22:19:45.825217", "step": 4704, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:45.873179", "step": 4704, "epoch": 3 }, { "type": "loss", "content": 0.0006749342428520322, "timestamp": "2025-09-30 22:19:45.877914", "step": 4705, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:45.924602", "step": 4705, "epoch": 3 }, { "type": "loss", "content": 0.0002722602221183479, "timestamp": "2025-09-30 22:19:45.932173", "step": 4706, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:45.996214", "step": 4706, "epoch": 3 }, { "type": "loss", "content": 0.002924448810517788, "timestamp": "2025-09-30 22:19:46.010194", "step": 4707, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 592 ], "flops": 17560600598464 }, "timestamp": "2025-09-30 22:19:46.068069", "step": 4707, "epoch": 3 }, { "type": "loss", "content": 0.002039122162386775, "timestamp": "2025-09-30 22:19:46.110020", "step": 4708, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:46.146288", "step": 4708, "epoch": 3 }, { "type": "loss", "content": 0.0011562302242964506, "timestamp": "2025-09-30 22:19:46.155943", "step": 4709, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:46.203784", "step": 4709, "epoch": 3 }, { "type": "loss", "content": 0.00036889605689793825, "timestamp": "2025-09-30 22:19:46.217473", "step": 4710, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:46.270105", "step": 4710, "epoch": 3 }, { "type": "loss", "content": 0.0008371215080842376, "timestamp": "2025-09-30 22:19:46.276936", "step": 4711, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:46.321107", "step": 4711, "epoch": 3 }, { "type": "loss", "content": 0.0016866663936525583, "timestamp": "2025-09-30 22:19:46.352194", "step": 4712, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:46.396317", "step": 4712, "epoch": 3 }, { "type": "loss", "content": 0.0002764615637715906, "timestamp": "2025-09-30 22:19:46.401667", "step": 4713, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:46.436917", "step": 4713, "epoch": 3 }, { "type": "loss", "content": 0.00044769837404601276, "timestamp": "2025-09-30 22:19:46.446931", "step": 4714, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:46.495594", "step": 4714, "epoch": 3 }, { "type": "loss", "content": 0.003973923623561859, "timestamp": "2025-09-30 22:19:46.502644", "step": 4715, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:19:49.159786", "step": 4715, "epoch": 3 }, { "type": "pplx", "content": 5.916283949698796, "timestamp": "2025-09-30 22:19:49.165907", "step": 4715, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:49.198024", "step": 4715, "epoch": 3 }, { "type": "loss", "content": 0.008571183308959007, "timestamp": "2025-09-30 22:19:49.231133", "step": 4716, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:49.280396", "step": 4716, "epoch": 3 }, { "type": "loss", "content": 0.0006484166369773448, "timestamp": "2025-09-30 22:19:49.288754", "step": 4717, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:49.335668", "step": 4717, "epoch": 3 }, { "type": "loss", "content": 0.0005155609687790275, "timestamp": "2025-09-30 22:19:49.348306", "step": 4718, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:49.392772", "step": 4718, "epoch": 3 }, { "type": "loss", "content": 0.003010595915839076, "timestamp": "2025-09-30 22:19:49.400358", "step": 4719, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:49.440611", "step": 4719, "epoch": 3 }, { "type": "loss", "content": 0.002692155074328184, "timestamp": "2025-09-30 22:19:49.471788", "step": 4720, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:19:49.504329", "step": 4720, "epoch": 3 }, { "type": "loss", "content": 0.001199776423163712, "timestamp": "2025-09-30 22:19:49.506514", "step": 4721, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:49.541885", "step": 4721, "epoch": 3 }, { "type": "loss", "content": 0.0006059607258066535, "timestamp": "2025-09-30 22:19:49.549424", "step": 4722, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:49.591228", "step": 4722, "epoch": 3 }, { "type": "loss", "content": 0.0011583056766539812, "timestamp": "2025-09-30 22:19:49.604659", "step": 4723, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:49.643155", "step": 4723, "epoch": 3 }, { "type": "loss", "content": 0.0011154541280120611, "timestamp": "2025-09-30 22:19:49.671796", "step": 4724, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:49.706889", "step": 4724, "epoch": 3 }, { "type": "loss", "content": 0.001738877734169364, "timestamp": "2025-09-30 22:19:49.714893", "step": 4725, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:49.748139", "step": 4725, "epoch": 3 }, { "type": "loss", "content": 0.0023189731873571873, "timestamp": "2025-09-30 22:19:49.756106", "step": 4726, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:49.792938", "step": 4726, "epoch": 3 }, { "type": "loss", "content": 0.003144277259707451, "timestamp": "2025-09-30 22:19:49.805038", "step": 4727, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:49.849840", "step": 4727, "epoch": 3 }, { "type": "loss", "content": 0.009790902957320213, "timestamp": "2025-09-30 22:19:49.884156", "step": 4728, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:49.924978", "step": 4728, "epoch": 3 }, { "type": "loss", "content": 0.000623914529569447, "timestamp": "2025-09-30 22:19:49.930454", "step": 4729, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:49.963705", "step": 4729, "epoch": 3 }, { "type": "loss", "content": 0.010114804841578007, "timestamp": "2025-09-30 22:19:49.971723", "step": 4730, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:50.010049", "step": 4730, "epoch": 3 }, { "type": "loss", "content": 0.0009162042988464236, "timestamp": "2025-09-30 22:19:50.018097", "step": 4731, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:50.056047", "step": 4731, "epoch": 3 }, { "type": "loss", "content": 0.00019842210167553276, "timestamp": "2025-09-30 22:19:50.089084", "step": 4732, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:50.137549", "step": 4732, "epoch": 3 }, { "type": "loss", "content": 0.002871887059882283, "timestamp": "2025-09-30 22:19:50.152815", "step": 4733, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:50.189714", "step": 4733, "epoch": 3 }, { "type": "loss", "content": 0.003444153117015958, "timestamp": "2025-09-30 22:19:50.203455", "step": 4734, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:50.241485", "step": 4734, "epoch": 3 }, { "type": "loss", "content": 0.004989412147551775, "timestamp": "2025-09-30 22:19:50.248331", "step": 4735, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:19:50.302747", "step": 4735, "epoch": 3 }, { "type": "loss", "content": 0.006008580792695284, "timestamp": "2025-09-30 22:19:50.339932", "step": 4736, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:50.383337", "step": 4736, "epoch": 3 }, { "type": "loss", "content": 0.0024037344846874475, "timestamp": "2025-09-30 22:19:50.395881", "step": 4737, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:50.435039", "step": 4737, "epoch": 3 }, { "type": "loss", "content": 0.0007503728847950697, "timestamp": "2025-09-30 22:19:50.448716", "step": 4738, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:50.485018", "step": 4738, "epoch": 3 }, { "type": "loss", "content": 0.0027330864686518908, "timestamp": "2025-09-30 22:19:50.492885", "step": 4739, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:50.530740", "step": 4739, "epoch": 3 }, { "type": "loss", "content": 0.005178096238523722, "timestamp": "2025-09-30 22:19:50.565531", "step": 4740, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:50.624153", "step": 4740, "epoch": 3 }, { "type": "loss", "content": 0.0003495750133879483, "timestamp": "2025-09-30 22:19:50.634871", "step": 4741, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:50.679545", "step": 4741, "epoch": 3 }, { "type": "loss", "content": 0.0003605498350225389, "timestamp": "2025-09-30 22:19:50.687500", "step": 4742, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:50.729502", "step": 4742, "epoch": 3 }, { "type": "loss", "content": 0.002601269632577896, "timestamp": "2025-09-30 22:19:50.740075", "step": 4743, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:50.787436", "step": 4743, "epoch": 3 }, { "type": "loss", "content": 0.0007903297082521021, "timestamp": "2025-09-30 22:19:50.820818", "step": 4744, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:19:50.864280", "step": 4744, "epoch": 3 }, { "type": "loss", "content": 0.000361712125595659, "timestamp": "2025-09-30 22:19:50.880085", "step": 4745, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:50.919572", "step": 4745, "epoch": 3 }, { "type": "loss", "content": 0.01337670162320137, "timestamp": "2025-09-30 22:19:50.929988", "step": 4746, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:50.962672", "step": 4746, "epoch": 3 }, { "type": "loss", "content": 0.0022669811733067036, "timestamp": "2025-09-30 22:19:50.970607", "step": 4747, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:51.013616", "step": 4747, "epoch": 3 }, { "type": "loss", "content": 0.0003634454042185098, "timestamp": "2025-09-30 22:19:51.042074", "step": 4748, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:51.077298", "step": 4748, "epoch": 3 }, { "type": "loss", "content": 0.004122935235500336, "timestamp": "2025-09-30 22:19:51.090513", "step": 4749, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:51.129413", "step": 4749, "epoch": 3 }, { "type": "loss", "content": 0.0012344943825155497, "timestamp": "2025-09-30 22:19:51.136228", "step": 4750, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:51.170077", "step": 4750, "epoch": 3 }, { "type": "loss", "content": 0.0031255418434739113, "timestamp": "2025-09-30 22:19:51.174527", "step": 4751, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:51.208859", "step": 4751, "epoch": 3 }, { "type": "loss", "content": 0.0018705984111875296, "timestamp": "2025-09-30 22:19:51.240840", "step": 4752, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:51.277634", "step": 4752, "epoch": 3 }, { "type": "loss", "content": 0.00835074856877327, "timestamp": "2025-09-30 22:19:51.283274", "step": 4753, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:51.321212", "step": 4753, "epoch": 3 }, { "type": "loss", "content": 0.0031465431675314903, "timestamp": "2025-09-30 22:19:51.331623", "step": 4754, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:51.384832", "step": 4754, "epoch": 3 }, { "type": "loss", "content": 0.0033585333731025457, "timestamp": "2025-09-30 22:19:51.389860", "step": 4755, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:51.429842", "step": 4755, "epoch": 3 }, { "type": "loss", "content": 0.00037267437437549233, "timestamp": "2025-09-30 22:19:51.464158", "step": 4756, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:51.509366", "step": 4756, "epoch": 3 }, { "type": "loss", "content": 0.002945550484582782, "timestamp": "2025-09-30 22:19:51.517312", "step": 4757, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:51.553013", "step": 4757, "epoch": 3 }, { "type": "loss", "content": 0.003742699744179845, "timestamp": "2025-09-30 22:19:51.560746", "step": 4758, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:51.594390", "step": 4758, "epoch": 3 }, { "type": "loss", "content": 0.0038363654166460037, "timestamp": "2025-09-30 22:19:51.598863", "step": 4759, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:51.642669", "step": 4759, "epoch": 3 }, { "type": "loss", "content": 0.0011934564681723714, "timestamp": "2025-09-30 22:19:51.673819", "step": 4760, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:51.708526", "step": 4760, "epoch": 3 }, { "type": "loss", "content": 0.0024356788489967585, "timestamp": "2025-09-30 22:19:51.711917", "step": 4761, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:51.751471", "step": 4761, "epoch": 3 }, { "type": "loss", "content": 0.00025372591335326433, "timestamp": "2025-09-30 22:19:51.758390", "step": 4762, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:19:51.791196", "step": 4762, "epoch": 3 }, { "type": "loss", "content": 0.0007249244372360408, "timestamp": "2025-09-30 22:19:51.795468", "step": 4763, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:51.839825", "step": 4763, "epoch": 3 }, { "type": "loss", "content": 0.0019645406864583492, "timestamp": "2025-09-30 22:19:51.873195", "step": 4764, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:19:51.920353", "step": 4764, "epoch": 3 }, { "type": "loss", "content": 0.0048051439225673676, "timestamp": "2025-09-30 22:19:51.927563", "step": 4765, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:51.962335", "step": 4765, "epoch": 3 }, { "type": "loss", "content": 0.0010794244008138776, "timestamp": "2025-09-30 22:19:51.966738", "step": 4766, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:19:51.999503", "step": 4766, "epoch": 3 }, { "type": "loss", "content": 0.00022505532251670957, "timestamp": "2025-09-30 22:19:52.003882", "step": 4767, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:52.038392", "step": 4767, "epoch": 3 }, { "type": "loss", "content": 0.0019170681480318308, "timestamp": "2025-09-30 22:19:52.067010", "step": 4768, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:52.106339", "step": 4768, "epoch": 3 }, { "type": "loss", "content": 0.0008401435916312039, "timestamp": "2025-09-30 22:19:52.111424", "step": 4769, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:52.150870", "step": 4769, "epoch": 3 }, { "type": "loss", "content": 0.0033256863243877888, "timestamp": "2025-09-30 22:19:52.160827", "step": 4770, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:52.197730", "step": 4770, "epoch": 3 }, { "type": "loss", "content": 0.004256395623087883, "timestamp": "2025-09-30 22:19:52.205367", "step": 4771, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:52.239837", "step": 4771, "epoch": 3 }, { "type": "loss", "content": 0.001991269877180457, "timestamp": "2025-09-30 22:19:52.267819", "step": 4772, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:52.301773", "step": 4772, "epoch": 3 }, { "type": "loss", "content": 0.0005228807567618787, "timestamp": "2025-09-30 22:19:52.306328", "step": 4773, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:52.346044", "step": 4773, "epoch": 3 }, { "type": "loss", "content": 0.0021591621916741133, "timestamp": "2025-09-30 22:19:52.356981", "step": 4774, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:52.390548", "step": 4774, "epoch": 3 }, { "type": "loss", "content": 0.002881488995626569, "timestamp": "2025-09-30 22:19:52.402822", "step": 4775, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:52.439950", "step": 4775, "epoch": 3 }, { "type": "loss", "content": 0.004071081057190895, "timestamp": "2025-09-30 22:19:52.474829", "step": 4776, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:52.514831", "step": 4776, "epoch": 3 }, { "type": "loss", "content": 0.0008434464689344168, "timestamp": "2025-09-30 22:19:52.525133", "step": 4777, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:52.558323", "step": 4777, "epoch": 3 }, { "type": "loss", "content": 0.0006649593124166131, "timestamp": "2025-09-30 22:19:52.569433", "step": 4778, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:52.602032", "step": 4778, "epoch": 3 }, { "type": "loss", "content": 0.0019085907842963934, "timestamp": "2025-09-30 22:19:52.612269", "step": 4779, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:52.645900", "step": 4779, "epoch": 3 }, { "type": "loss", "content": 0.0013334425166249275, "timestamp": "2025-09-30 22:19:52.674176", "step": 4780, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:52.713040", "step": 4780, "epoch": 3 }, { "type": "loss", "content": 0.004540843889117241, "timestamp": "2025-09-30 22:19:52.728256", "step": 4781, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:52.767615", "step": 4781, "epoch": 3 }, { "type": "loss", "content": 0.002883870154619217, "timestamp": "2025-09-30 22:19:52.781370", "step": 4782, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:52.817438", "step": 4782, "epoch": 3 }, { "type": "loss", "content": 0.004271261394023895, "timestamp": "2025-09-30 22:19:52.827972", "step": 4783, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:52.862346", "step": 4783, "epoch": 3 }, { "type": "loss", "content": 0.002068759873509407, "timestamp": "2025-09-30 22:19:52.891048", "step": 4784, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:52.926739", "step": 4784, "epoch": 3 }, { "type": "loss", "content": 0.002156310947611928, "timestamp": "2025-09-30 22:19:52.931933", "step": 4785, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:52.969114", "step": 4785, "epoch": 3 }, { "type": "loss", "content": 0.0015850166091695428, "timestamp": "2025-09-30 22:19:52.976453", "step": 4786, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:53.019116", "step": 4786, "epoch": 3 }, { "type": "loss", "content": 0.0010362897301092744, "timestamp": "2025-09-30 22:19:53.033165", "step": 4787, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:53.073942", "step": 4787, "epoch": 3 }, { "type": "loss", "content": 0.0005819321959279478, "timestamp": "2025-09-30 22:19:53.102558", "step": 4788, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:19:53.145141", "step": 4788, "epoch": 3 }, { "type": "loss", "content": 0.005462463945150375, "timestamp": "2025-09-30 22:19:53.162067", "step": 4789, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:53.197579", "step": 4789, "epoch": 3 }, { "type": "loss", "content": 0.0010457502212375402, "timestamp": "2025-09-30 22:19:53.205505", "step": 4790, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:53.241714", "step": 4790, "epoch": 3 }, { "type": "loss", "content": 0.0010831314139068127, "timestamp": "2025-09-30 22:19:53.248644", "step": 4791, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:53.285422", "step": 4791, "epoch": 3 }, { "type": "loss", "content": 0.005183074623346329, "timestamp": "2025-09-30 22:19:53.313812", "step": 4792, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:53.348349", "step": 4792, "epoch": 3 }, { "type": "loss", "content": 0.0035677261184901, "timestamp": "2025-09-30 22:19:53.353841", "step": 4793, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:53.386978", "step": 4793, "epoch": 3 }, { "type": "loss", "content": 0.0006080110324546695, "timestamp": "2025-09-30 22:19:53.394277", "step": 4794, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:53.429953", "step": 4794, "epoch": 3 }, { "type": "loss", "content": 0.002011936390772462, "timestamp": "2025-09-30 22:19:53.442165", "step": 4795, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:53.475436", "step": 4795, "epoch": 3 }, { "type": "loss", "content": 0.001812252216041088, "timestamp": "2025-09-30 22:19:53.503743", "step": 4796, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:53.569862", "step": 4796, "epoch": 3 }, { "type": "loss", "content": 0.0021046362817287445, "timestamp": "2025-09-30 22:19:53.574914", "step": 4797, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:53.608040", "step": 4797, "epoch": 3 }, { "type": "loss", "content": 0.0029523340053856373, "timestamp": "2025-09-30 22:19:53.620683", "step": 4798, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:53.653481", "step": 4798, "epoch": 3 }, { "type": "loss", "content": 0.0034778157714754343, "timestamp": "2025-09-30 22:19:53.662876", "step": 4799, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:53.701581", "step": 4799, "epoch": 3 }, { "type": "loss", "content": 0.0006414828239940107, "timestamp": "2025-09-30 22:19:53.729273", "step": 4800, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:53.778220", "step": 4800, "epoch": 3 }, { "type": "loss", "content": 0.0027236840687692165, "timestamp": "2025-09-30 22:19:53.791260", "step": 4801, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:53.825221", "step": 4801, "epoch": 3 }, { "type": "loss", "content": 0.001283834339119494, "timestamp": "2025-09-30 22:19:53.832145", "step": 4802, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:53.874548", "step": 4802, "epoch": 3 }, { "type": "loss", "content": 0.0005156965926289558, "timestamp": "2025-09-30 22:19:53.884804", "step": 4803, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:53.927121", "step": 4803, "epoch": 3 }, { "type": "loss", "content": 0.0042557427659630775, "timestamp": "2025-09-30 22:19:53.955294", "step": 4804, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:19:53.994287", "step": 4804, "epoch": 3 }, { "type": "loss", "content": 0.0015035402029752731, "timestamp": "2025-09-30 22:19:54.010280", "step": 4805, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:54.047057", "step": 4805, "epoch": 3 }, { "type": "loss", "content": 0.002708726329728961, "timestamp": "2025-09-30 22:19:54.054508", "step": 4806, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:54.098052", "step": 4806, "epoch": 3 }, { "type": "loss", "content": 0.001241795253008604, "timestamp": "2025-09-30 22:19:54.110691", "step": 4807, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:19:54.154326", "step": 4807, "epoch": 3 }, { "type": "loss", "content": 0.00076678377809003, "timestamp": "2025-09-30 22:19:54.185596", "step": 4808, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:54.227596", "step": 4808, "epoch": 3 }, { "type": "loss", "content": 0.0068432642146945, "timestamp": "2025-09-30 22:19:54.240998", "step": 4809, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:54.285095", "step": 4809, "epoch": 3 }, { "type": "loss", "content": 0.0026549880858510733, "timestamp": "2025-09-30 22:19:54.299135", "step": 4810, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:54.333302", "step": 4810, "epoch": 3 }, { "type": "loss", "content": 0.0008714778232388198, "timestamp": "2025-09-30 22:19:54.345554", "step": 4811, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:54.379315", "step": 4811, "epoch": 3 }, { "type": "loss", "content": 0.003857155330479145, "timestamp": "2025-09-30 22:19:54.407641", "step": 4812, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:19:54.443622", "step": 4812, "epoch": 3 }, { "type": "loss", "content": 0.010509241372346878, "timestamp": "2025-09-30 22:19:54.456980", "step": 4813, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:54.495788", "step": 4813, "epoch": 3 }, { "type": "loss", "content": 0.013247305527329445, "timestamp": "2025-09-30 22:19:54.508415", "step": 4814, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:54.555100", "step": 4814, "epoch": 3 }, { "type": "loss", "content": 0.008652194403111935, "timestamp": "2025-09-30 22:19:54.570734", "step": 4815, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:54.608167", "step": 4815, "epoch": 3 }, { "type": "loss", "content": 0.006328233517706394, "timestamp": "2025-09-30 22:19:54.638271", "step": 4816, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:54.672896", "step": 4816, "epoch": 3 }, { "type": "loss", "content": 0.006439377553761005, "timestamp": "2025-09-30 22:19:54.685917", "step": 4817, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:54.724449", "step": 4817, "epoch": 3 }, { "type": "loss", "content": 0.007747083902359009, "timestamp": "2025-09-30 22:19:54.738261", "step": 4818, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:54.777415", "step": 4818, "epoch": 3 }, { "type": "loss", "content": 0.00303424964658916, "timestamp": "2025-09-30 22:19:54.787128", "step": 4819, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:54.827660", "step": 4819, "epoch": 3 }, { "type": "loss", "content": 0.0028589745052158833, "timestamp": "2025-09-30 22:19:54.861879", "step": 4820, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:54.898352", "step": 4820, "epoch": 3 }, { "type": "loss", "content": 0.002803644398227334, "timestamp": "2025-09-30 22:19:54.908489", "step": 4821, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:54.944604", "step": 4821, "epoch": 3 }, { "type": "loss", "content": 0.004195516929030418, "timestamp": "2025-09-30 22:19:54.956569", "step": 4822, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:54.993327", "step": 4822, "epoch": 3 }, { "type": "loss", "content": 0.005452101118862629, "timestamp": "2025-09-30 22:19:55.004232", "step": 4823, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:19:55.041894", "step": 4823, "epoch": 3 }, { "type": "loss", "content": 0.003121099667623639, "timestamp": "2025-09-30 22:19:55.072230", "step": 4824, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:55.115066", "step": 4824, "epoch": 3 }, { "type": "loss", "content": 0.00045138385030440986, "timestamp": "2025-09-30 22:19:55.119470", "step": 4825, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:55.162302", "step": 4825, "epoch": 3 }, { "type": "loss", "content": 0.004608877934515476, "timestamp": "2025-09-30 22:19:55.174368", "step": 4826, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:55.207428", "step": 4826, "epoch": 3 }, { "type": "loss", "content": 0.01124146394431591, "timestamp": "2025-09-30 22:19:55.218269", "step": 4827, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:55.258376", "step": 4827, "epoch": 3 }, { "type": "loss", "content": 0.0018338344525545835, "timestamp": "2025-09-30 22:19:55.289988", "step": 4828, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:55.327265", "step": 4828, "epoch": 3 }, { "type": "loss", "content": 0.0024090490769594908, "timestamp": "2025-09-30 22:19:55.340246", "step": 4829, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:55.382022", "step": 4829, "epoch": 3 }, { "type": "loss", "content": 0.007087676785886288, "timestamp": "2025-09-30 22:19:55.389352", "step": 4830, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:19:57.831717", "step": 4830, "epoch": 3 }, { "type": "pplx", "content": 6.00403182745848, "timestamp": "2025-09-30 22:19:57.835117", "step": 4830, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:57.867527", "step": 4830, "epoch": 3 }, { "type": "loss", "content": 0.0020713661797344685, "timestamp": "2025-09-30 22:19:57.877508", "step": 4831, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:57.911376", "step": 4831, "epoch": 3 }, { "type": "loss", "content": 0.003641373012214899, "timestamp": "2025-09-30 22:19:57.942477", "step": 4832, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:19:57.983911", "step": 4832, "epoch": 3 }, { "type": "loss", "content": 0.00029382065986283123, "timestamp": "2025-09-30 22:19:57.992717", "step": 4833, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:58.032411", "step": 4833, "epoch": 3 }, { "type": "loss", "content": 0.0003123684728052467, "timestamp": "2025-09-30 22:19:58.044397", "step": 4834, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:19:58.080813", "step": 4834, "epoch": 3 }, { "type": "loss", "content": 0.0014360976638272405, "timestamp": "2025-09-30 22:19:58.087632", "step": 4835, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:58.130984", "step": 4835, "epoch": 3 }, { "type": "loss", "content": 0.0006729178712703288, "timestamp": "2025-09-30 22:19:58.165329", "step": 4836, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:58.198399", "step": 4836, "epoch": 3 }, { "type": "loss", "content": 0.0005326576065272093, "timestamp": "2025-09-30 22:19:58.212066", "step": 4837, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:19:58.256949", "step": 4837, "epoch": 3 }, { "type": "loss", "content": 0.0012045191833749413, "timestamp": "2025-09-30 22:19:58.270290", "step": 4838, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:58.305562", "step": 4838, "epoch": 3 }, { "type": "loss", "content": 0.003865152597427368, "timestamp": "2025-09-30 22:19:58.316500", "step": 4839, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:58.361056", "step": 4839, "epoch": 3 }, { "type": "loss", "content": 0.010859113186597824, "timestamp": "2025-09-30 22:19:58.392643", "step": 4840, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:19:58.438372", "step": 4840, "epoch": 3 }, { "type": "loss", "content": 0.0014040463138371706, "timestamp": "2025-09-30 22:19:58.456456", "step": 4841, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:58.522837", "step": 4841, "epoch": 3 }, { "type": "loss", "content": 0.0022020265460014343, "timestamp": "2025-09-30 22:19:58.542824", "step": 4842, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:58.596703", "step": 4842, "epoch": 3 }, { "type": "loss", "content": 0.002500901697203517, "timestamp": "2025-09-30 22:19:58.614464", "step": 4843, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:58.662835", "step": 4843, "epoch": 3 }, { "type": "loss", "content": 0.009635166265070438, "timestamp": "2025-09-30 22:19:58.702492", "step": 4844, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:58.742975", "step": 4844, "epoch": 3 }, { "type": "loss", "content": 0.000577495142351836, "timestamp": "2025-09-30 22:19:58.756188", "step": 4845, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:58.803952", "step": 4845, "epoch": 3 }, { "type": "loss", "content": 0.003777291625738144, "timestamp": "2025-09-30 22:19:58.816538", "step": 4846, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:58.861487", "step": 4846, "epoch": 3 }, { "type": "loss", "content": 0.0007583480328321457, "timestamp": "2025-09-30 22:19:58.873666", "step": 4847, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:58.921695", "step": 4847, "epoch": 3 }, { "type": "loss", "content": 0.002258284017443657, "timestamp": "2025-09-30 22:19:58.956370", "step": 4848, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:19:59.001847", "step": 4848, "epoch": 3 }, { "type": "loss", "content": 0.004016295075416565, "timestamp": "2025-09-30 22:19:59.017499", "step": 4849, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:59.065748", "step": 4849, "epoch": 3 }, { "type": "loss", "content": 0.005633024498820305, "timestamp": "2025-09-30 22:19:59.081262", "step": 4850, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:59.123684", "step": 4850, "epoch": 3 }, { "type": "loss", "content": 0.0025410479865968227, "timestamp": "2025-09-30 22:19:59.136214", "step": 4851, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:59.188446", "step": 4851, "epoch": 3 }, { "type": "loss", "content": 0.0015249482821673155, "timestamp": "2025-09-30 22:19:59.223137", "step": 4852, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:59.267398", "step": 4852, "epoch": 3 }, { "type": "loss", "content": 0.006067855749279261, "timestamp": "2025-09-30 22:19:59.282725", "step": 4853, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:59.317344", "step": 4853, "epoch": 3 }, { "type": "loss", "content": 0.002697502262890339, "timestamp": "2025-09-30 22:19:59.340439", "step": 4854, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:59.393458", "step": 4854, "epoch": 3 }, { "type": "loss", "content": 0.0076017738319933414, "timestamp": "2025-09-30 22:19:59.418794", "step": 4855, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:59.460281", "step": 4855, "epoch": 3 }, { "type": "loss", "content": 0.0035977442748844624, "timestamp": "2025-09-30 22:19:59.500647", "step": 4856, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:19:59.540118", "step": 4856, "epoch": 3 }, { "type": "loss", "content": 0.003823881270363927, "timestamp": "2025-09-30 22:19:59.550554", "step": 4857, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:19:59.598185", "step": 4857, "epoch": 3 }, { "type": "loss", "content": 0.008830385282635689, "timestamp": "2025-09-30 22:19:59.612014", "step": 4858, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:19:59.658641", "step": 4858, "epoch": 3 }, { "type": "loss", "content": 0.010811922140419483, "timestamp": "2025-09-30 22:19:59.674249", "step": 4859, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:19:59.718442", "step": 4859, "epoch": 3 }, { "type": "loss", "content": 0.003709632670506835, "timestamp": "2025-09-30 22:19:59.753149", "step": 4860, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:19:59.796300", "step": 4860, "epoch": 3 }, { "type": "loss", "content": 0.0012815343216061592, "timestamp": "2025-09-30 22:19:59.813689", "step": 4861, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:19:59.857633", "step": 4861, "epoch": 3 }, { "type": "loss", "content": 0.011897546239197254, "timestamp": "2025-09-30 22:19:59.870973", "step": 4862, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:19:59.913239", "step": 4862, "epoch": 3 }, { "type": "loss", "content": 0.004625137895345688, "timestamp": "2025-09-30 22:19:59.924051", "step": 4863, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:19:59.972091", "step": 4863, "epoch": 3 }, { "type": "loss", "content": 0.004630404058843851, "timestamp": "2025-09-30 22:20:00.005276", "step": 4864, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:00.059337", "step": 4864, "epoch": 3 }, { "type": "loss", "content": 0.0031580578070133924, "timestamp": "2025-09-30 22:20:00.071931", "step": 4865, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:00.112764", "step": 4865, "epoch": 3 }, { "type": "loss", "content": 0.005686325021088123, "timestamp": "2025-09-30 22:20:00.126599", "step": 4866, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:00.161908", "step": 4866, "epoch": 3 }, { "type": "loss", "content": 0.0021785860881209373, "timestamp": "2025-09-30 22:20:00.175636", "step": 4867, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:00.217110", "step": 4867, "epoch": 3 }, { "type": "loss", "content": 0.0067062643356621265, "timestamp": "2025-09-30 22:20:00.251278", "step": 4868, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:00.291486", "step": 4868, "epoch": 3 }, { "type": "loss", "content": 0.0007970409351401031, "timestamp": "2025-09-30 22:20:00.301439", "step": 4869, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:00.335479", "step": 4869, "epoch": 3 }, { "type": "loss", "content": 0.0006516333087347448, "timestamp": "2025-09-30 22:20:00.346272", "step": 4870, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:00.379224", "step": 4870, "epoch": 3 }, { "type": "loss", "content": 0.002726598409935832, "timestamp": "2025-09-30 22:20:00.389978", "step": 4871, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:00.422906", "step": 4871, "epoch": 3 }, { "type": "loss", "content": 0.002618856495246291, "timestamp": "2025-09-30 22:20:00.456113", "step": 4872, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:00.489336", "step": 4872, "epoch": 3 }, { "type": "loss", "content": 0.0014631540980190039, "timestamp": "2025-09-30 22:20:00.493963", "step": 4873, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:00.534430", "step": 4873, "epoch": 3 }, { "type": "loss", "content": 0.0011044855928048491, "timestamp": "2025-09-30 22:20:00.545435", "step": 4874, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:20:00.596335", "step": 4874, "epoch": 3 }, { "type": "loss", "content": 0.014512617141008377, "timestamp": "2025-09-30 22:20:00.613349", "step": 4875, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:00.653202", "step": 4875, "epoch": 3 }, { "type": "loss", "content": 0.0019889986142516136, "timestamp": "2025-09-30 22:20:00.686346", "step": 4876, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:00.729198", "step": 4876, "epoch": 3 }, { "type": "loss", "content": 0.0013323453022167087, "timestamp": "2025-09-30 22:20:00.741932", "step": 4877, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:20:00.788681", "step": 4877, "epoch": 3 }, { "type": "loss", "content": 0.0036682288628071547, "timestamp": "2025-09-30 22:20:00.804398", "step": 4878, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 544 ], "flops": 16136789420416 }, "timestamp": "2025-09-30 22:20:00.858563", "step": 4878, "epoch": 3 }, { "type": "loss", "content": 0.004160485230386257, "timestamp": "2025-09-30 22:20:00.877643", "step": 4879, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:20:00.918275", "step": 4879, "epoch": 3 }, { "type": "loss", "content": 0.003462860593572259, "timestamp": "2025-09-30 22:20:00.953097", "step": 4880, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:00.996869", "step": 4880, "epoch": 3 }, { "type": "loss", "content": 0.004931773990392685, "timestamp": "2025-09-30 22:20:01.007667", "step": 4881, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:01.055795", "step": 4881, "epoch": 3 }, { "type": "loss", "content": 0.0018768792506307364, "timestamp": "2025-09-30 22:20:01.068120", "step": 4882, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:01.119732", "step": 4882, "epoch": 3 }, { "type": "loss", "content": 0.0018635703017935157, "timestamp": "2025-09-30 22:20:01.133437", "step": 4883, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:01.179444", "step": 4883, "epoch": 3 }, { "type": "loss", "content": 0.0034107728861272335, "timestamp": "2025-09-30 22:20:01.214207", "step": 4884, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:20:01.272375", "step": 4884, "epoch": 3 }, { "type": "loss", "content": 0.001060501323081553, "timestamp": "2025-09-30 22:20:01.287779", "step": 4885, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:01.334182", "step": 4885, "epoch": 3 }, { "type": "loss", "content": 0.0005893037305213511, "timestamp": "2025-09-30 22:20:01.347887", "step": 4886, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:01.380956", "step": 4886, "epoch": 3 }, { "type": "loss", "content": 0.0013945624232292175, "timestamp": "2025-09-30 22:20:01.391880", "step": 4887, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:01.439785", "step": 4887, "epoch": 3 }, { "type": "loss", "content": 0.0019568903371691704, "timestamp": "2025-09-30 22:20:01.473211", "step": 4888, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:20:01.513333", "step": 4888, "epoch": 3 }, { "type": "loss", "content": 0.005334537476301193, "timestamp": "2025-09-30 22:20:01.529157", "step": 4889, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:01.562951", "step": 4889, "epoch": 3 }, { "type": "loss", "content": 0.004985298495739698, "timestamp": "2025-09-30 22:20:01.574186", "step": 4890, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:01.613354", "step": 4890, "epoch": 3 }, { "type": "loss", "content": 0.004774213302880526, "timestamp": "2025-09-30 22:20:01.626748", "step": 4891, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:20:01.674578", "step": 4891, "epoch": 3 }, { "type": "loss", "content": 0.001032113330438733, "timestamp": "2025-09-30 22:20:01.711652", "step": 4892, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:01.750767", "step": 4892, "epoch": 3 }, { "type": "loss", "content": 0.0032533309422433376, "timestamp": "2025-09-30 22:20:01.763906", "step": 4893, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:01.805856", "step": 4893, "epoch": 3 }, { "type": "loss", "content": 0.0033527498599141836, "timestamp": "2025-09-30 22:20:01.818433", "step": 4894, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:20:01.863286", "step": 4894, "epoch": 3 }, { "type": "loss", "content": 0.0034811438526958227, "timestamp": "2025-09-30 22:20:01.880580", "step": 4895, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:20:01.922598", "step": 4895, "epoch": 3 }, { "type": "loss", "content": 0.0022857696749269962, "timestamp": "2025-09-30 22:20:01.957522", "step": 4896, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:01.992399", "step": 4896, "epoch": 3 }, { "type": "loss", "content": 0.0008828876889310777, "timestamp": "2025-09-30 22:20:01.997708", "step": 4897, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:02.038404", "step": 4897, "epoch": 3 }, { "type": "loss", "content": 0.006894740276038647, "timestamp": "2025-09-30 22:20:02.047934", "step": 4898, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:02.082137", "step": 4898, "epoch": 3 }, { "type": "loss", "content": 0.0022010121028870344, "timestamp": "2025-09-30 22:20:02.093282", "step": 4899, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:02.126092", "step": 4899, "epoch": 3 }, { "type": "loss", "content": 0.003999904729425907, "timestamp": "2025-09-30 22:20:02.154926", "step": 4900, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:02.196354", "step": 4900, "epoch": 3 }, { "type": "loss", "content": 0.002763181459158659, "timestamp": "2025-09-30 22:20:02.201974", "step": 4901, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:02.233901", "step": 4901, "epoch": 3 }, { "type": "loss", "content": 0.0005524757434614003, "timestamp": "2025-09-30 22:20:02.242211", "step": 4902, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:02.280643", "step": 4902, "epoch": 3 }, { "type": "loss", "content": 0.0009317506337538362, "timestamp": "2025-09-30 22:20:02.288603", "step": 4903, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:02.325818", "step": 4903, "epoch": 3 }, { "type": "loss", "content": 0.0023062448017299175, "timestamp": "2025-09-30 22:20:02.357054", "step": 4904, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:02.396426", "step": 4904, "epoch": 3 }, { "type": "loss", "content": 0.0009080026648007333, "timestamp": "2025-09-30 22:20:02.404353", "step": 4905, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:02.447633", "step": 4905, "epoch": 3 }, { "type": "loss", "content": 0.0010759907308965921, "timestamp": "2025-09-30 22:20:02.458052", "step": 4906, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:02.497621", "step": 4906, "epoch": 3 }, { "type": "loss", "content": 0.002068843925371766, "timestamp": "2025-09-30 22:20:02.510208", "step": 4907, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:02.557110", "step": 4907, "epoch": 3 }, { "type": "loss", "content": 0.0005585406324826181, "timestamp": "2025-09-30 22:20:02.589242", "step": 4908, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:02.624360", "step": 4908, "epoch": 3 }, { "type": "loss", "content": 0.0036755427718162537, "timestamp": "2025-09-30 22:20:02.636168", "step": 4909, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:02.676745", "step": 4909, "epoch": 3 }, { "type": "loss", "content": 0.0012291098246350884, "timestamp": "2025-09-30 22:20:02.689557", "step": 4910, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:02.753063", "step": 4910, "epoch": 3 }, { "type": "loss", "content": 0.0008901917026378214, "timestamp": "2025-09-30 22:20:02.760310", "step": 4911, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:02.801478", "step": 4911, "epoch": 3 }, { "type": "loss", "content": 0.0024204845540225506, "timestamp": "2025-09-30 22:20:02.829224", "step": 4912, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:20:02.866527", "step": 4912, "epoch": 3 }, { "type": "loss", "content": 0.002846227027475834, "timestamp": "2025-09-30 22:20:02.876580", "step": 4913, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:02.916586", "step": 4913, "epoch": 3 }, { "type": "loss", "content": 0.002848659874871373, "timestamp": "2025-09-30 22:20:02.925383", "step": 4914, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:02.968181", "step": 4914, "epoch": 3 }, { "type": "loss", "content": 0.000996431801468134, "timestamp": "2025-09-30 22:20:02.980357", "step": 4915, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:03.017893", "step": 4915, "epoch": 3 }, { "type": "loss", "content": 0.0005112317157909274, "timestamp": "2025-09-30 22:20:03.047229", "step": 4916, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:03.088400", "step": 4916, "epoch": 3 }, { "type": "loss", "content": 0.000986065249890089, "timestamp": "2025-09-30 22:20:03.100841", "step": 4917, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:03.134455", "step": 4917, "epoch": 3 }, { "type": "loss", "content": 0.002913024974986911, "timestamp": "2025-09-30 22:20:03.146750", "step": 4918, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:03.188641", "step": 4918, "epoch": 3 }, { "type": "loss", "content": 0.0006965301581658423, "timestamp": "2025-09-30 22:20:03.202012", "step": 4919, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:03.236631", "step": 4919, "epoch": 3 }, { "type": "loss", "content": 0.002137935720384121, "timestamp": "2025-09-30 22:20:03.266008", "step": 4920, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:03.300349", "step": 4920, "epoch": 3 }, { "type": "loss", "content": 0.0022465272340923548, "timestamp": "2025-09-30 22:20:03.306086", "step": 4921, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:03.340703", "step": 4921, "epoch": 3 }, { "type": "loss", "content": 0.0003808860492426902, "timestamp": "2025-09-30 22:20:03.354710", "step": 4922, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:03.389992", "step": 4922, "epoch": 3 }, { "type": "loss", "content": 0.0016280546551570296, "timestamp": "2025-09-30 22:20:03.401028", "step": 4923, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:03.438833", "step": 4923, "epoch": 3 }, { "type": "loss", "content": 0.002310381270945072, "timestamp": "2025-09-30 22:20:03.470120", "step": 4924, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:03.506525", "step": 4924, "epoch": 3 }, { "type": "loss", "content": 0.008243824355304241, "timestamp": "2025-09-30 22:20:03.514154", "step": 4925, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:03.561807", "step": 4925, "epoch": 3 }, { "type": "loss", "content": 0.0007182768895290792, "timestamp": "2025-09-30 22:20:03.568986", "step": 4926, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:03.631333", "step": 4926, "epoch": 3 }, { "type": "loss", "content": 0.0027369027957320213, "timestamp": "2025-09-30 22:20:03.638561", "step": 4927, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:03.685843", "step": 4927, "epoch": 3 }, { "type": "loss", "content": 0.0006846529431641102, "timestamp": "2025-09-30 22:20:03.726993", "step": 4928, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:03.766064", "step": 4928, "epoch": 3 }, { "type": "loss", "content": 0.0016806876519694924, "timestamp": "2025-09-30 22:20:03.783157", "step": 4929, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:03.842748", "step": 4929, "epoch": 3 }, { "type": "loss", "content": 0.0005215826095081866, "timestamp": "2025-09-30 22:20:03.863782", "step": 4930, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:03.905420", "step": 4930, "epoch": 3 }, { "type": "loss", "content": 0.0003204425738658756, "timestamp": "2025-09-30 22:20:03.913001", "step": 4931, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:03.953071", "step": 4931, "epoch": 3 }, { "type": "loss", "content": 0.0004890135023742914, "timestamp": "2025-09-30 22:20:03.983205", "step": 4932, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:04.048277", "step": 4932, "epoch": 3 }, { "type": "loss", "content": 0.00029881924274377525, "timestamp": "2025-09-30 22:20:04.053114", "step": 4933, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:04.103877", "step": 4933, "epoch": 3 }, { "type": "loss", "content": 0.0007652129279449582, "timestamp": "2025-09-30 22:20:04.111772", "step": 4934, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:04.145907", "step": 4934, "epoch": 3 }, { "type": "loss", "content": 0.002809490542858839, "timestamp": "2025-09-30 22:20:04.156944", "step": 4935, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:04.192975", "step": 4935, "epoch": 3 }, { "type": "loss", "content": 0.001373854698613286, "timestamp": "2025-09-30 22:20:04.224343", "step": 4936, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:04.258773", "step": 4936, "epoch": 3 }, { "type": "loss", "content": 0.0024177818559110165, "timestamp": "2025-09-30 22:20:04.267563", "step": 4937, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:04.308463", "step": 4937, "epoch": 3 }, { "type": "loss", "content": 0.000511349004227668, "timestamp": "2025-09-30 22:20:04.325857", "step": 4938, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:04.365670", "step": 4938, "epoch": 3 }, { "type": "loss", "content": 0.013990801759064198, "timestamp": "2025-09-30 22:20:04.373393", "step": 4939, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:04.425924", "step": 4939, "epoch": 3 }, { "type": "loss", "content": 0.0008692088886164129, "timestamp": "2025-09-30 22:20:04.454526", "step": 4940, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:04.487584", "step": 4940, "epoch": 3 }, { "type": "loss", "content": 0.0023386774118989706, "timestamp": "2025-09-30 22:20:04.501836", "step": 4941, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:04.543872", "step": 4941, "epoch": 3 }, { "type": "loss", "content": 0.0004770174855366349, "timestamp": "2025-09-30 22:20:04.554180", "step": 4942, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:04.586014", "step": 4942, "epoch": 3 }, { "type": "loss", "content": 0.0015950370579957962, "timestamp": "2025-09-30 22:20:04.596529", "step": 4943, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:04.630914", "step": 4943, "epoch": 3 }, { "type": "loss", "content": 0.0012634805170819163, "timestamp": "2025-09-30 22:20:04.662667", "step": 4944, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:04.697185", "step": 4944, "epoch": 3 }, { "type": "loss", "content": 0.0018261810764670372, "timestamp": "2025-09-30 22:20:04.707109", "step": 4945, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:20:07.195307", "step": 4945, "epoch": 3 }, { "type": "pplx", "content": 6.016984404568712, "timestamp": "2025-09-30 22:20:07.199300", "step": 4945, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:07.231856", "step": 4945, "epoch": 3 }, { "type": "loss", "content": 0.0005609581130556762, "timestamp": "2025-09-30 22:20:07.241691", "step": 4946, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:07.290686", "step": 4946, "epoch": 3 }, { "type": "loss", "content": 0.0029263258911669254, "timestamp": "2025-09-30 22:20:07.304404", "step": 4947, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:07.348977", "step": 4947, "epoch": 3 }, { "type": "loss", "content": 0.0006811014609411359, "timestamp": "2025-09-30 22:20:07.380755", "step": 4948, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:07.413620", "step": 4948, "epoch": 3 }, { "type": "loss", "content": 0.0005025799619033933, "timestamp": "2025-09-30 22:20:07.422260", "step": 4949, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:07.459848", "step": 4949, "epoch": 3 }, { "type": "loss", "content": 0.004274230916053057, "timestamp": "2025-09-30 22:20:07.472374", "step": 4950, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:07.516625", "step": 4950, "epoch": 3 }, { "type": "loss", "content": 0.0005864065024070442, "timestamp": "2025-09-30 22:20:07.529218", "step": 4951, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:07.579379", "step": 4951, "epoch": 3 }, { "type": "loss", "content": 0.00278579187579453, "timestamp": "2025-09-30 22:20:07.611270", "step": 4952, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:07.645371", "step": 4952, "epoch": 3 }, { "type": "loss", "content": 0.0011230125091969967, "timestamp": "2025-09-30 22:20:07.653947", "step": 4953, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:07.686018", "step": 4953, "epoch": 3 }, { "type": "loss", "content": 0.00035609089536592364, "timestamp": "2025-09-30 22:20:07.697189", "step": 4954, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:07.730898", "step": 4954, "epoch": 3 }, { "type": "loss", "content": 0.0022988419514149427, "timestamp": "2025-09-30 22:20:07.743240", "step": 4955, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:07.778623", "step": 4955, "epoch": 3 }, { "type": "loss", "content": 0.0022796562407165766, "timestamp": "2025-09-30 22:20:07.807476", "step": 4956, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:07.840947", "step": 4956, "epoch": 3 }, { "type": "loss", "content": 0.002927593421190977, "timestamp": "2025-09-30 22:20:07.853662", "step": 4957, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:07.886443", "step": 4957, "epoch": 3 }, { "type": "loss", "content": 0.0007647660095244646, "timestamp": "2025-09-30 22:20:07.897514", "step": 4958, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:07.941372", "step": 4958, "epoch": 3 }, { "type": "loss", "content": 0.0022333392407745123, "timestamp": "2025-09-30 22:20:07.953674", "step": 4959, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:07.998548", "step": 4959, "epoch": 3 }, { "type": "loss", "content": 0.0015479567227885127, "timestamp": "2025-09-30 22:20:08.032839", "step": 4960, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:08.072423", "step": 4960, "epoch": 3 }, { "type": "loss", "content": 9.858738485490903e-05, "timestamp": "2025-09-30 22:20:08.080491", "step": 4961, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:08.124165", "step": 4961, "epoch": 3 }, { "type": "loss", "content": 0.0009962372714653611, "timestamp": "2025-09-30 22:20:08.136749", "step": 4962, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:08.173761", "step": 4962, "epoch": 3 }, { "type": "loss", "content": 0.0013241246342658997, "timestamp": "2025-09-30 22:20:08.186043", "step": 4963, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:08.221733", "step": 4963, "epoch": 3 }, { "type": "loss", "content": 0.00218255165964365, "timestamp": "2025-09-30 22:20:08.255168", "step": 4964, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:08.316475", "step": 4964, "epoch": 3 }, { "type": "loss", "content": 0.0009654370369389653, "timestamp": "2025-09-30 22:20:08.329147", "step": 4965, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:08.363268", "step": 4965, "epoch": 3 }, { "type": "loss", "content": 0.000785224256105721, "timestamp": "2025-09-30 22:20:08.374789", "step": 4966, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:08.418988", "step": 4966, "epoch": 3 }, { "type": "loss", "content": 0.0007269098423421383, "timestamp": "2025-09-30 22:20:08.433512", "step": 4967, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:08.471546", "step": 4967, "epoch": 3 }, { "type": "loss", "content": 0.0024295735638588667, "timestamp": "2025-09-30 22:20:08.504778", "step": 4968, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:08.545227", "step": 4968, "epoch": 3 }, { "type": "loss", "content": 0.0007269516936503351, "timestamp": "2025-09-30 22:20:08.553999", "step": 4969, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:08.592788", "step": 4969, "epoch": 3 }, { "type": "loss", "content": 0.001029142295010388, "timestamp": "2025-09-30 22:20:08.603925", "step": 4970, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:08.641414", "step": 4970, "epoch": 3 }, { "type": "loss", "content": 0.0019772483501583338, "timestamp": "2025-09-30 22:20:08.653986", "step": 4971, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:08.699420", "step": 4971, "epoch": 3 }, { "type": "loss", "content": 0.003952382132411003, "timestamp": "2025-09-30 22:20:08.731474", "step": 4972, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:08.764520", "step": 4972, "epoch": 3 }, { "type": "loss", "content": 0.0005930595798417926, "timestamp": "2025-09-30 22:20:08.775895", "step": 4973, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:08.810330", "step": 4973, "epoch": 3 }, { "type": "loss", "content": 0.0009736541542224586, "timestamp": "2025-09-30 22:20:08.820721", "step": 4974, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:08.860718", "step": 4974, "epoch": 3 }, { "type": "loss", "content": 0.011569741182029247, "timestamp": "2025-09-30 22:20:08.868208", "step": 4975, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:08.901806", "step": 4975, "epoch": 3 }, { "type": "loss", "content": 0.0005655374843627214, "timestamp": "2025-09-30 22:20:08.935015", "step": 4976, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:08.977844", "step": 4976, "epoch": 3 }, { "type": "loss", "content": 0.002230295678600669, "timestamp": "2025-09-30 22:20:08.988062", "step": 4977, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:09.027488", "step": 4977, "epoch": 3 }, { "type": "loss", "content": 0.0006978387827984989, "timestamp": "2025-09-30 22:20:09.039013", "step": 4978, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:09.085753", "step": 4978, "epoch": 3 }, { "type": "loss", "content": 0.0005524156731553376, "timestamp": "2025-09-30 22:20:09.098137", "step": 4979, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:09.131276", "step": 4979, "epoch": 3 }, { "type": "loss", "content": 0.00037273086491040885, "timestamp": "2025-09-30 22:20:09.166188", "step": 4980, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:09.199316", "step": 4980, "epoch": 3 }, { "type": "loss", "content": 0.00028574716998264194, "timestamp": "2025-09-30 22:20:09.210012", "step": 4981, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:09.258417", "step": 4981, "epoch": 3 }, { "type": "loss", "content": 0.001474974793381989, "timestamp": "2025-09-30 22:20:09.270690", "step": 4982, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:09.303613", "step": 4982, "epoch": 3 }, { "type": "loss", "content": 0.0012398564722388983, "timestamp": "2025-09-30 22:20:09.315884", "step": 4983, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:09.356394", "step": 4983, "epoch": 3 }, { "type": "loss", "content": 0.0007058014743961394, "timestamp": "2025-09-30 22:20:09.389849", "step": 4984, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:09.424168", "step": 4984, "epoch": 3 }, { "type": "loss", "content": 0.0010078602936118841, "timestamp": "2025-09-30 22:20:09.436862", "step": 4985, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:09.471482", "step": 4985, "epoch": 3 }, { "type": "loss", "content": 0.0005812669987790287, "timestamp": "2025-09-30 22:20:09.484021", "step": 4986, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:09.519168", "step": 4986, "epoch": 3 }, { "type": "loss", "content": 0.002003656467422843, "timestamp": "2025-09-30 22:20:09.530368", "step": 4987, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:09.568815", "step": 4987, "epoch": 3 }, { "type": "loss", "content": 0.011207995936274529, "timestamp": "2025-09-30 22:20:09.602096", "step": 4988, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:09.636364", "step": 4988, "epoch": 3 }, { "type": "loss", "content": 0.0014819592470303178, "timestamp": "2025-09-30 22:20:09.649034", "step": 4989, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:09.685931", "step": 4989, "epoch": 3 }, { "type": "loss", "content": 0.004838210996240377, "timestamp": "2025-09-30 22:20:09.699631", "step": 4990, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:09.733798", "step": 4990, "epoch": 3 }, { "type": "loss", "content": 0.00016354575927834958, "timestamp": "2025-09-30 22:20:09.746366", "step": 4991, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:09.783152", "step": 4991, "epoch": 3 }, { "type": "loss", "content": 0.0002751017455011606, "timestamp": "2025-09-30 22:20:09.816582", "step": 4992, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:09.855504", "step": 4992, "epoch": 3 }, { "type": "loss", "content": 0.001686186995357275, "timestamp": "2025-09-30 22:20:09.866405", "step": 4993, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:09.909331", "step": 4993, "epoch": 3 }, { "type": "loss", "content": 0.0007971947197802365, "timestamp": "2025-09-30 22:20:09.921978", "step": 4994, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:09.954729", "step": 4994, "epoch": 3 }, { "type": "loss", "content": 0.0003467929782345891, "timestamp": "2025-09-30 22:20:09.967319", "step": 4995, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:09.999720", "step": 4995, "epoch": 3 }, { "type": "loss", "content": 0.000309546769130975, "timestamp": "2025-09-30 22:20:10.032822", "step": 4996, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:10.078952", "step": 4996, "epoch": 3 }, { "type": "loss", "content": 0.0012426752364262938, "timestamp": "2025-09-30 22:20:10.098018", "step": 4997, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:10.145792", "step": 4997, "epoch": 3 }, { "type": "loss", "content": 0.0015795336803421378, "timestamp": "2025-09-30 22:20:10.158002", "step": 4998, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:10.210350", "step": 4998, "epoch": 3 }, { "type": "loss", "content": 0.005597970448434353, "timestamp": "2025-09-30 22:20:10.222900", "step": 4999, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:10.273464", "step": 4999, "epoch": 3 }, { "type": "loss", "content": 0.001970804063603282, "timestamp": "2025-09-30 22:20:10.305057", "step": 5000, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 5000", "timestamp": "2025-09-30 22:20:15.807087", "step": 5000, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:15.845853", "step": 5000, "epoch": 3 }, { "type": "loss", "content": 0.0008431835449300706, "timestamp": "2025-09-30 22:20:15.853094", "step": 5001, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:15.888096", "step": 5001, "epoch": 3 }, { "type": "loss", "content": 0.00022042497585061938, "timestamp": "2025-09-30 22:20:15.898327", "step": 5002, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:15.949923", "step": 5002, "epoch": 3 }, { "type": "loss", "content": 0.0012976464349776506, "timestamp": "2025-09-30 22:20:15.958730", "step": 5003, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:15.998396", "step": 5003, "epoch": 3 }, { "type": "loss", "content": 0.0027910852804780006, "timestamp": "2025-09-30 22:20:16.027833", "step": 5004, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:16.061025", "step": 5004, "epoch": 3 }, { "type": "loss", "content": 0.003565196180716157, "timestamp": "2025-09-30 22:20:16.066345", "step": 5005, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:16.104647", "step": 5005, "epoch": 3 }, { "type": "loss", "content": 0.0007262213621288538, "timestamp": "2025-09-30 22:20:16.112284", "step": 5006, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:16.151245", "step": 5006, "epoch": 3 }, { "type": "loss", "content": 0.0004352441756054759, "timestamp": "2025-09-30 22:20:16.158990", "step": 5007, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:16.199166", "step": 5007, "epoch": 3 }, { "type": "loss", "content": 0.0002115533861797303, "timestamp": "2025-09-30 22:20:16.227796", "step": 5008, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:16.264336", "step": 5008, "epoch": 3 }, { "type": "loss", "content": 0.0023786493111401796, "timestamp": "2025-09-30 22:20:16.272357", "step": 5009, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:16.311612", "step": 5009, "epoch": 3 }, { "type": "loss", "content": 0.002945238258689642, "timestamp": "2025-09-30 22:20:16.321793", "step": 5010, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:16.357312", "step": 5010, "epoch": 3 }, { "type": "loss", "content": 0.0014447633875533938, "timestamp": "2025-09-30 22:20:16.366645", "step": 5011, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:16.415344", "step": 5011, "epoch": 3 }, { "type": "loss", "content": 0.005410191603004932, "timestamp": "2025-09-30 22:20:16.445607", "step": 5012, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:16.477476", "step": 5012, "epoch": 3 }, { "type": "loss", "content": 0.0032125164289027452, "timestamp": "2025-09-30 22:20:16.483049", "step": 5013, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:20:16.514767", "step": 5013, "epoch": 3 }, { "type": "loss", "content": 0.004742915742099285, "timestamp": "2025-09-30 22:20:16.522775", "step": 5014, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:16.561876", "step": 5014, "epoch": 3 }, { "type": "loss", "content": 0.0009069226798601449, "timestamp": "2025-09-30 22:20:16.572292", "step": 5015, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:16.605187", "step": 5015, "epoch": 3 }, { "type": "loss", "content": 0.00934299360960722, "timestamp": "2025-09-30 22:20:16.638305", "step": 5016, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:16.676225", "step": 5016, "epoch": 3 }, { "type": "loss", "content": 0.003764711320400238, "timestamp": "2025-09-30 22:20:16.689222", "step": 5017, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:16.740946", "step": 5017, "epoch": 3 }, { "type": "loss", "content": 0.0002709300024434924, "timestamp": "2025-09-30 22:20:16.748265", "step": 5018, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:16.791772", "step": 5018, "epoch": 3 }, { "type": "loss", "content": 0.005229764152318239, "timestamp": "2025-09-30 22:20:16.804128", "step": 5019, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:16.841050", "step": 5019, "epoch": 3 }, { "type": "loss", "content": 0.0009594621951691806, "timestamp": "2025-09-30 22:20:16.883195", "step": 5020, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:16.915666", "step": 5020, "epoch": 3 }, { "type": "loss", "content": 0.0020179443527013063, "timestamp": "2025-09-30 22:20:16.924516", "step": 5021, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:16.981422", "step": 5021, "epoch": 3 }, { "type": "loss", "content": 0.0021082826424390078, "timestamp": "2025-09-30 22:20:16.994866", "step": 5022, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:17.027975", "step": 5022, "epoch": 3 }, { "type": "loss", "content": 0.011751977726817131, "timestamp": "2025-09-30 22:20:17.040371", "step": 5023, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:17.074558", "step": 5023, "epoch": 3 }, { "type": "loss", "content": 0.0023646140471100807, "timestamp": "2025-09-30 22:20:17.106742", "step": 5024, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:17.153377", "step": 5024, "epoch": 3 }, { "type": "loss", "content": 0.0007554754265584052, "timestamp": "2025-09-30 22:20:17.161507", "step": 5025, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:17.197600", "step": 5025, "epoch": 3 }, { "type": "loss", "content": 0.0005631324602290988, "timestamp": "2025-09-30 22:20:17.210151", "step": 5026, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:17.266489", "step": 5026, "epoch": 3 }, { "type": "loss", "content": 0.0003204508393537253, "timestamp": "2025-09-30 22:20:17.278794", "step": 5027, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:17.317502", "step": 5027, "epoch": 3 }, { "type": "loss", "content": 0.008249454200267792, "timestamp": "2025-09-30 22:20:17.350687", "step": 5028, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:17.387331", "step": 5028, "epoch": 3 }, { "type": "loss", "content": 0.005023043602705002, "timestamp": "2025-09-30 22:20:17.396077", "step": 5029, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:17.435520", "step": 5029, "epoch": 3 }, { "type": "loss", "content": 0.0007688560290262103, "timestamp": "2025-09-30 22:20:17.446811", "step": 5030, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:17.478563", "step": 5030, "epoch": 3 }, { "type": "loss", "content": 0.0028127082623541355, "timestamp": "2025-09-30 22:20:17.489162", "step": 5031, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:17.529278", "step": 5031, "epoch": 3 }, { "type": "loss", "content": 0.004154357593506575, "timestamp": "2025-09-30 22:20:17.560597", "step": 5032, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:17.598751", "step": 5032, "epoch": 3 }, { "type": "loss", "content": 0.001402303110808134, "timestamp": "2025-09-30 22:20:17.615024", "step": 5033, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:17.658997", "step": 5033, "epoch": 3 }, { "type": "loss", "content": 0.0005616036360152066, "timestamp": "2025-09-30 22:20:17.671340", "step": 5034, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:17.704018", "step": 5034, "epoch": 3 }, { "type": "loss", "content": 0.0007149986340664327, "timestamp": "2025-09-30 22:20:17.715071", "step": 5035, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:17.758087", "step": 5035, "epoch": 3 }, { "type": "loss", "content": 0.0006970600225031376, "timestamp": "2025-09-30 22:20:17.792801", "step": 5036, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:17.840948", "step": 5036, "epoch": 3 }, { "type": "loss", "content": 0.0001771541137713939, "timestamp": "2025-09-30 22:20:17.851519", "step": 5037, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:17.897262", "step": 5037, "epoch": 3 }, { "type": "loss", "content": 0.002388948807492852, "timestamp": "2025-09-30 22:20:17.905190", "step": 5038, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:17.950640", "step": 5038, "epoch": 3 }, { "type": "loss", "content": 0.010566468350589275, "timestamp": "2025-09-30 22:20:17.963228", "step": 5039, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:17.997513", "step": 5039, "epoch": 3 }, { "type": "loss", "content": 0.0237954743206501, "timestamp": "2025-09-30 22:20:18.025970", "step": 5040, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:18.067672", "step": 5040, "epoch": 3 }, { "type": "loss", "content": 0.0006434906972572207, "timestamp": "2025-09-30 22:20:18.079665", "step": 5041, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:18.121037", "step": 5041, "epoch": 3 }, { "type": "loss", "content": 0.000888917304109782, "timestamp": "2025-09-30 22:20:18.132315", "step": 5042, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:18.178891", "step": 5042, "epoch": 3 }, { "type": "loss", "content": 0.0016442594351246953, "timestamp": "2025-09-30 22:20:18.190234", "step": 5043, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:18.230054", "step": 5043, "epoch": 3 }, { "type": "loss", "content": 0.009254230186343193, "timestamp": "2025-09-30 22:20:18.258638", "step": 5044, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:18.291921", "step": 5044, "epoch": 3 }, { "type": "loss", "content": 0.00074589136056602, "timestamp": "2025-09-30 22:20:18.296473", "step": 5045, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:20:18.331178", "step": 5045, "epoch": 3 }, { "type": "loss", "content": 0.0008237607544288039, "timestamp": "2025-09-30 22:20:18.336049", "step": 5046, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:18.369174", "step": 5046, "epoch": 3 }, { "type": "loss", "content": 0.0040649957954883575, "timestamp": "2025-09-30 22:20:18.385038", "step": 5047, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:18.418435", "step": 5047, "epoch": 3 }, { "type": "loss", "content": 0.005607422906905413, "timestamp": "2025-09-30 22:20:18.449790", "step": 5048, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:18.491379", "step": 5048, "epoch": 3 }, { "type": "loss", "content": 0.0010886044474318624, "timestamp": "2025-09-30 22:20:18.503756", "step": 5049, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:18.537558", "step": 5049, "epoch": 3 }, { "type": "loss", "content": 0.002487512305378914, "timestamp": "2025-09-30 22:20:18.550094", "step": 5050, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:18.607654", "step": 5050, "epoch": 3 }, { "type": "loss", "content": 0.002762896940112114, "timestamp": "2025-09-30 22:20:18.621394", "step": 5051, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:18.663181", "step": 5051, "epoch": 3 }, { "type": "loss", "content": 0.001024669618345797, "timestamp": "2025-09-30 22:20:18.697269", "step": 5052, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:18.730431", "step": 5052, "epoch": 3 }, { "type": "loss", "content": 0.0033020072150975466, "timestamp": "2025-09-30 22:20:18.742139", "step": 5053, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:18.783491", "step": 5053, "epoch": 3 }, { "type": "loss", "content": 0.006183733697980642, "timestamp": "2025-09-30 22:20:18.795904", "step": 5054, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:18.828389", "step": 5054, "epoch": 3 }, { "type": "loss", "content": 0.0011442236136645079, "timestamp": "2025-09-30 22:20:18.839367", "step": 5055, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:18.873178", "step": 5055, "epoch": 3 }, { "type": "loss", "content": 0.003287734929472208, "timestamp": "2025-09-30 22:20:18.907436", "step": 5056, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:18.941851", "step": 5056, "epoch": 3 }, { "type": "loss", "content": 0.0019181531388312578, "timestamp": "2025-09-30 22:20:18.954859", "step": 5057, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:18.999825", "step": 5057, "epoch": 3 }, { "type": "loss", "content": 0.004124120809137821, "timestamp": "2025-09-30 22:20:19.013681", "step": 5058, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:19.050015", "step": 5058, "epoch": 3 }, { "type": "loss", "content": 0.0011872005416080356, "timestamp": "2025-09-30 22:20:19.062504", "step": 5059, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:19.104044", "step": 5059, "epoch": 3 }, { "type": "loss", "content": 0.003657316090539098, "timestamp": "2025-09-30 22:20:19.141364", "step": 5060, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:20:21.541585", "step": 5060, "epoch": 3 }, { "type": "pplx", "content": 5.869142437621591, "timestamp": "2025-09-30 22:20:21.552575", "step": 5060, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:21.584332", "step": 5060, "epoch": 3 }, { "type": "loss", "content": 0.002698945812880993, "timestamp": "2025-09-30 22:20:21.592275", "step": 5061, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:21.625848", "step": 5061, "epoch": 3 }, { "type": "loss", "content": 0.002139536663889885, "timestamp": "2025-09-30 22:20:21.638457", "step": 5062, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:21.678589", "step": 5062, "epoch": 3 }, { "type": "loss", "content": 0.0012386830057948828, "timestamp": "2025-09-30 22:20:21.688933", "step": 5063, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:21.722650", "step": 5063, "epoch": 3 }, { "type": "loss", "content": 0.0005841401871293783, "timestamp": "2025-09-30 22:20:21.754480", "step": 5064, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:21.797283", "step": 5064, "epoch": 3 }, { "type": "loss", "content": 0.0005301560158841312, "timestamp": "2025-09-30 22:20:21.807431", "step": 5065, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:21.840276", "step": 5065, "epoch": 3 }, { "type": "loss", "content": 0.005327840335667133, "timestamp": "2025-09-30 22:20:21.851464", "step": 5066, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:21.891879", "step": 5066, "epoch": 3 }, { "type": "loss", "content": 0.0005545703461393714, "timestamp": "2025-09-30 22:20:21.899460", "step": 5067, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:21.931033", "step": 5067, "epoch": 3 }, { "type": "loss", "content": 0.001500859041698277, "timestamp": "2025-09-30 22:20:21.963736", "step": 5068, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:22.004078", "step": 5068, "epoch": 3 }, { "type": "loss", "content": 0.0016331304796040058, "timestamp": "2025-09-30 22:20:22.017143", "step": 5069, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:22.057346", "step": 5069, "epoch": 3 }, { "type": "loss", "content": 0.0013913362054154277, "timestamp": "2025-09-30 22:20:22.065169", "step": 5070, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:22.107825", "step": 5070, "epoch": 3 }, { "type": "loss", "content": 0.011057810857892036, "timestamp": "2025-09-30 22:20:22.121565", "step": 5071, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:22.162276", "step": 5071, "epoch": 3 }, { "type": "loss", "content": 0.0009770106989890337, "timestamp": "2025-09-30 22:20:22.196472", "step": 5072, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:22.229855", "step": 5072, "epoch": 3 }, { "type": "loss", "content": 0.0020410018041729927, "timestamp": "2025-09-30 22:20:22.240626", "step": 5073, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:22.282091", "step": 5073, "epoch": 3 }, { "type": "loss", "content": 0.003938453271985054, "timestamp": "2025-09-30 22:20:22.295469", "step": 5074, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:22.342443", "step": 5074, "epoch": 3 }, { "type": "loss", "content": 0.00034149156999774277, "timestamp": "2025-09-30 22:20:22.356095", "step": 5075, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:22.397790", "step": 5075, "epoch": 3 }, { "type": "loss", "content": 0.0015579075552523136, "timestamp": "2025-09-30 22:20:22.429927", "step": 5076, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:22.471366", "step": 5076, "epoch": 3 }, { "type": "loss", "content": 0.0011037306394428015, "timestamp": "2025-09-30 22:20:22.481746", "step": 5077, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:22.529593", "step": 5077, "epoch": 3 }, { "type": "loss", "content": 0.002535564359277487, "timestamp": "2025-09-30 22:20:22.538953", "step": 5078, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:22.578446", "step": 5078, "epoch": 3 }, { "type": "loss", "content": 0.0018124495400115848, "timestamp": "2025-09-30 22:20:22.595330", "step": 5079, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:22.635621", "step": 5079, "epoch": 3 }, { "type": "loss", "content": 0.003327887039631605, "timestamp": "2025-09-30 22:20:22.664036", "step": 5080, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:22.704286", "step": 5080, "epoch": 3 }, { "type": "loss", "content": 0.0026340847834944725, "timestamp": "2025-09-30 22:20:22.716848", "step": 5081, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:22.750905", "step": 5081, "epoch": 3 }, { "type": "loss", "content": 0.0014675736892968416, "timestamp": "2025-09-30 22:20:22.762085", "step": 5082, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:22.800421", "step": 5082, "epoch": 3 }, { "type": "loss", "content": 0.0008998352568596601, "timestamp": "2025-09-30 22:20:22.811610", "step": 5083, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:22.855065", "step": 5083, "epoch": 3 }, { "type": "loss", "content": 0.0049322196282446384, "timestamp": "2025-09-30 22:20:22.889650", "step": 5084, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:22.933551", "step": 5084, "epoch": 3 }, { "type": "loss", "content": 0.00042208569357171655, "timestamp": "2025-09-30 22:20:22.942174", "step": 5085, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:22.975520", "step": 5085, "epoch": 3 }, { "type": "loss", "content": 0.002821327419951558, "timestamp": "2025-09-30 22:20:22.986544", "step": 5086, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:23.027136", "step": 5086, "epoch": 3 }, { "type": "loss", "content": 0.0022981923539191484, "timestamp": "2025-09-30 22:20:23.037458", "step": 5087, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:23.090413", "step": 5087, "epoch": 3 }, { "type": "loss", "content": 0.0007188955205492675, "timestamp": "2025-09-30 22:20:23.122218", "step": 5088, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:23.166275", "step": 5088, "epoch": 3 }, { "type": "loss", "content": 0.00043219528743065894, "timestamp": "2025-09-30 22:20:23.174335", "step": 5089, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:23.212596", "step": 5089, "epoch": 3 }, { "type": "loss", "content": 0.0014171154471114278, "timestamp": "2025-09-30 22:20:23.222844", "step": 5090, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:23.257514", "step": 5090, "epoch": 3 }, { "type": "loss", "content": 0.0023080131504684687, "timestamp": "2025-09-30 22:20:23.270766", "step": 5091, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:23.317405", "step": 5091, "epoch": 3 }, { "type": "loss", "content": 0.002961727324873209, "timestamp": "2025-09-30 22:20:23.347894", "step": 5092, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:23.381768", "step": 5092, "epoch": 3 }, { "type": "loss", "content": 0.0007711683865636587, "timestamp": "2025-09-30 22:20:23.394422", "step": 5093, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:23.431988", "step": 5093, "epoch": 3 }, { "type": "loss", "content": 0.002112046116963029, "timestamp": "2025-09-30 22:20:23.441171", "step": 5094, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:23.481488", "step": 5094, "epoch": 3 }, { "type": "loss", "content": 0.0012389529729261994, "timestamp": "2025-09-30 22:20:23.492530", "step": 5095, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:23.527565", "step": 5095, "epoch": 3 }, { "type": "loss", "content": 0.006873283069580793, "timestamp": "2025-09-30 22:20:23.561001", "step": 5096, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:23.601288", "step": 5096, "epoch": 3 }, { "type": "loss", "content": 0.0029062642715871334, "timestamp": "2025-09-30 22:20:23.609892", "step": 5097, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:23.641598", "step": 5097, "epoch": 3 }, { "type": "loss", "content": 0.004804032389074564, "timestamp": "2025-09-30 22:20:23.653875", "step": 5098, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:23.694926", "step": 5098, "epoch": 3 }, { "type": "loss", "content": 0.001358595909550786, "timestamp": "2025-09-30 22:20:23.702702", "step": 5099, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:23.742504", "step": 5099, "epoch": 3 }, { "type": "loss", "content": 0.0027899867855012417, "timestamp": "2025-09-30 22:20:23.773952", "step": 5100, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:23.821382", "step": 5100, "epoch": 3 }, { "type": "loss", "content": 0.0014267588267102838, "timestamp": "2025-09-30 22:20:23.831676", "step": 5101, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:23.872720", "step": 5101, "epoch": 3 }, { "type": "loss", "content": 0.0021460719872266054, "timestamp": "2025-09-30 22:20:23.885084", "step": 5102, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:23.922788", "step": 5102, "epoch": 3 }, { "type": "loss", "content": 0.0053979563526809216, "timestamp": "2025-09-30 22:20:23.936097", "step": 5103, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:23.997633", "step": 5103, "epoch": 3 }, { "type": "loss", "content": 0.0006758447270840406, "timestamp": "2025-09-30 22:20:24.031545", "step": 5104, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:24.078213", "step": 5104, "epoch": 3 }, { "type": "loss", "content": 0.0037395928520709276, "timestamp": "2025-09-30 22:20:24.084524", "step": 5105, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:24.121548", "step": 5105, "epoch": 3 }, { "type": "loss", "content": 0.00035706738708540797, "timestamp": "2025-09-30 22:20:24.139222", "step": 5106, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:24.191593", "step": 5106, "epoch": 3 }, { "type": "loss", "content": 0.002709871158003807, "timestamp": "2025-09-30 22:20:24.198911", "step": 5107, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:24.243361", "step": 5107, "epoch": 3 }, { "type": "loss", "content": 0.0020465017296373844, "timestamp": "2025-09-30 22:20:24.278108", "step": 5108, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:24.315975", "step": 5108, "epoch": 3 }, { "type": "loss", "content": 0.001779739512130618, "timestamp": "2025-09-30 22:20:24.324723", "step": 5109, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:24.356177", "step": 5109, "epoch": 3 }, { "type": "loss", "content": 0.0033832560293376446, "timestamp": "2025-09-30 22:20:24.364232", "step": 5110, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:24.395781", "step": 5110, "epoch": 3 }, { "type": "loss", "content": 0.002700228476896882, "timestamp": "2025-09-30 22:20:24.406780", "step": 5111, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:24.439400", "step": 5111, "epoch": 3 }, { "type": "loss", "content": 0.0015108698280528188, "timestamp": "2025-09-30 22:20:24.467535", "step": 5112, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:24.502587", "step": 5112, "epoch": 3 }, { "type": "loss", "content": 0.0027247534599155188, "timestamp": "2025-09-30 22:20:24.507447", "step": 5113, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:24.539150", "step": 5113, "epoch": 3 }, { "type": "loss", "content": 0.0008614298421889544, "timestamp": "2025-09-30 22:20:24.546106", "step": 5114, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:20:24.581046", "step": 5114, "epoch": 3 }, { "type": "loss", "content": 0.0016441134503111243, "timestamp": "2025-09-30 22:20:24.589468", "step": 5115, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:24.621617", "step": 5115, "epoch": 3 }, { "type": "loss", "content": 0.0015888211783021688, "timestamp": "2025-09-30 22:20:24.652777", "step": 5116, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:24.685887", "step": 5116, "epoch": 3 }, { "type": "loss", "content": 0.0004532829625532031, "timestamp": "2025-09-30 22:20:24.696576", "step": 5117, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:24.729373", "step": 5117, "epoch": 3 }, { "type": "loss", "content": 0.009183548390865326, "timestamp": "2025-09-30 22:20:24.737259", "step": 5118, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:20:24.784572", "step": 5118, "epoch": 3 }, { "type": "loss", "content": 0.0008579294662922621, "timestamp": "2025-09-30 22:20:24.789141", "step": 5119, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:24.830750", "step": 5119, "epoch": 3 }, { "type": "loss", "content": 0.003431705292314291, "timestamp": "2025-09-30 22:20:24.859252", "step": 5120, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:20:24.891755", "step": 5120, "epoch": 3 }, { "type": "loss", "content": 0.00036426790757104754, "timestamp": "2025-09-30 22:20:24.898406", "step": 5121, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:24.931043", "step": 5121, "epoch": 3 }, { "type": "loss", "content": 0.0005698530003428459, "timestamp": "2025-09-30 22:20:24.939405", "step": 5122, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:24.976596", "step": 5122, "epoch": 3 }, { "type": "loss", "content": 0.003810714930295944, "timestamp": "2025-09-30 22:20:24.986954", "step": 5123, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:25.026821", "step": 5123, "epoch": 3 }, { "type": "loss", "content": 0.0008139772689901292, "timestamp": "2025-09-30 22:20:25.058186", "step": 5124, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:25.095840", "step": 5124, "epoch": 3 }, { "type": "loss", "content": 0.0010362503817304969, "timestamp": "2025-09-30 22:20:25.101423", "step": 5125, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:25.134031", "step": 5125, "epoch": 3 }, { "type": "loss", "content": 0.0014636110281571746, "timestamp": "2025-09-30 22:20:25.144591", "step": 5126, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:25.180560", "step": 5126, "epoch": 3 }, { "type": "loss", "content": 0.00022271781926974654, "timestamp": "2025-09-30 22:20:25.191807", "step": 5127, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:25.224558", "step": 5127, "epoch": 3 }, { "type": "loss", "content": 8.554430678486824e-05, "timestamp": "2025-09-30 22:20:25.253444", "step": 5128, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:25.286751", "step": 5128, "epoch": 3 }, { "type": "loss", "content": 0.005259825848042965, "timestamp": "2025-09-30 22:20:25.291861", "step": 5129, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:25.329086", "step": 5129, "epoch": 3 }, { "type": "loss", "content": 0.0031255753710865974, "timestamp": "2025-09-30 22:20:25.337088", "step": 5130, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:25.376168", "step": 5130, "epoch": 3 }, { "type": "loss", "content": 0.006884410977363586, "timestamp": "2025-09-30 22:20:25.384897", "step": 5131, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:25.425789", "step": 5131, "epoch": 3 }, { "type": "loss", "content": 0.00804687850177288, "timestamp": "2025-09-30 22:20:25.458904", "step": 5132, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:25.510823", "step": 5132, "epoch": 3 }, { "type": "loss", "content": 0.0006524841883219779, "timestamp": "2025-09-30 22:20:25.517170", "step": 5133, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:25.560331", "step": 5133, "epoch": 3 }, { "type": "loss", "content": 0.0006048069335520267, "timestamp": "2025-09-30 22:20:25.568176", "step": 5134, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:25.607791", "step": 5134, "epoch": 3 }, { "type": "loss", "content": 8.057076775003225e-05, "timestamp": "2025-09-30 22:20:25.621117", "step": 5135, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:25.657377", "step": 5135, "epoch": 3 }, { "type": "loss", "content": 0.002809246303513646, "timestamp": "2025-09-30 22:20:25.686950", "step": 5136, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:25.721322", "step": 5136, "epoch": 3 }, { "type": "loss", "content": 0.0012514720438048244, "timestamp": "2025-09-30 22:20:25.732114", "step": 5137, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:25.775466", "step": 5137, "epoch": 3 }, { "type": "loss", "content": 0.00027411506744101644, "timestamp": "2025-09-30 22:20:25.789311", "step": 5138, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:25.837741", "step": 5138, "epoch": 3 }, { "type": "loss", "content": 0.004303140100091696, "timestamp": "2025-09-30 22:20:25.851174", "step": 5139, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:25.894558", "step": 5139, "epoch": 3 }, { "type": "loss", "content": 0.0010680407285690308, "timestamp": "2025-09-30 22:20:25.922570", "step": 5140, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:20:25.954086", "step": 5140, "epoch": 3 }, { "type": "loss", "content": 0.0021734959445893764, "timestamp": "2025-09-30 22:20:25.957403", "step": 5141, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:25.990150", "step": 5141, "epoch": 3 }, { "type": "loss", "content": 0.0010639060055837035, "timestamp": "2025-09-30 22:20:25.997226", "step": 5142, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:26.031424", "step": 5142, "epoch": 3 }, { "type": "loss", "content": 0.0004178355447947979, "timestamp": "2025-09-30 22:20:26.039293", "step": 5143, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:26.087030", "step": 5143, "epoch": 3 }, { "type": "loss", "content": 0.00021914293756708503, "timestamp": "2025-09-30 22:20:26.118064", "step": 5144, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:26.154458", "step": 5144, "epoch": 3 }, { "type": "loss", "content": 0.00019750260980799794, "timestamp": "2025-09-30 22:20:26.162865", "step": 5145, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:26.195393", "step": 5145, "epoch": 3 }, { "type": "loss", "content": 0.0038122800178825855, "timestamp": "2025-09-30 22:20:26.205844", "step": 5146, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-30 22:20:26.242585", "step": 5146, "epoch": 3 }, { "type": "loss", "content": 0.0025838292203843594, "timestamp": "2025-09-30 22:20:26.250123", "step": 5147, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:26.288272", "step": 5147, "epoch": 3 }, { "type": "loss", "content": 0.0002004976267926395, "timestamp": "2025-09-30 22:20:26.317461", "step": 5148, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:26.350936", "step": 5148, "epoch": 3 }, { "type": "loss", "content": 0.009289993904531002, "timestamp": "2025-09-30 22:20:26.360059", "step": 5149, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:26.417430", "step": 5149, "epoch": 3 }, { "type": "loss", "content": 0.0010501905344426632, "timestamp": "2025-09-30 22:20:26.425310", "step": 5150, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:26.460385", "step": 5150, "epoch": 3 }, { "type": "loss", "content": 5.500621045939624e-05, "timestamp": "2025-09-30 22:20:26.471432", "step": 5151, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:26.511083", "step": 5151, "epoch": 3 }, { "type": "loss", "content": 0.0026809093542397022, "timestamp": "2025-09-30 22:20:26.540146", "step": 5152, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:26.572819", "step": 5152, "epoch": 3 }, { "type": "loss", "content": 0.001439956366084516, "timestamp": "2025-09-30 22:20:26.582770", "step": 5153, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:26.621925", "step": 5153, "epoch": 3 }, { "type": "loss", "content": 0.00030368121224455535, "timestamp": "2025-09-30 22:20:26.629435", "step": 5154, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:26.670075", "step": 5154, "epoch": 3 }, { "type": "loss", "content": 0.0015567500377073884, "timestamp": "2025-09-30 22:20:26.682712", "step": 5155, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:26.723354", "step": 5155, "epoch": 3 }, { "type": "loss", "content": 0.0007795571000315249, "timestamp": "2025-09-30 22:20:26.756592", "step": 5156, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:26.799544", "step": 5156, "epoch": 3 }, { "type": "loss", "content": 0.0017613848904147744, "timestamp": "2025-09-30 22:20:26.808727", "step": 5157, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:26.848721", "step": 5157, "epoch": 3 }, { "type": "loss", "content": 0.010004185140132904, "timestamp": "2025-09-30 22:20:26.859698", "step": 5158, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:26.894196", "step": 5158, "epoch": 3 }, { "type": "loss", "content": 0.00019910503760911524, "timestamp": "2025-09-30 22:20:26.902087", "step": 5159, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:26.941508", "step": 5159, "epoch": 3 }, { "type": "loss", "content": 0.0004115682386327535, "timestamp": "2025-09-30 22:20:26.973178", "step": 5160, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:27.010989", "step": 5160, "epoch": 3 }, { "type": "loss", "content": 0.001299984403885901, "timestamp": "2025-09-30 22:20:27.015898", "step": 5161, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:27.057827", "step": 5161, "epoch": 3 }, { "type": "loss", "content": 0.0034290680196136236, "timestamp": "2025-09-30 22:20:27.069564", "step": 5162, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:27.103709", "step": 5162, "epoch": 3 }, { "type": "loss", "content": 0.0007851793197914958, "timestamp": "2025-09-30 22:20:27.114069", "step": 5163, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:27.148201", "step": 5163, "epoch": 3 }, { "type": "loss", "content": 0.00461316155269742, "timestamp": "2025-09-30 22:20:27.181044", "step": 5164, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:27.214486", "step": 5164, "epoch": 3 }, { "type": "loss", "content": 0.00031694734934717417, "timestamp": "2025-09-30 22:20:27.222420", "step": 5165, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:27.255503", "step": 5165, "epoch": 3 }, { "type": "loss", "content": 0.0018249392742291093, "timestamp": "2025-09-30 22:20:27.263099", "step": 5166, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:27.306568", "step": 5166, "epoch": 3 }, { "type": "loss", "content": 0.00027169540408067405, "timestamp": "2025-09-30 22:20:27.314474", "step": 5167, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:27.360754", "step": 5167, "epoch": 3 }, { "type": "loss", "content": 0.0006059342413209379, "timestamp": "2025-09-30 22:20:27.391899", "step": 5168, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:27.438306", "step": 5168, "epoch": 3 }, { "type": "loss", "content": 0.024362290278077126, "timestamp": "2025-09-30 22:20:27.447068", "step": 5169, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:27.484372", "step": 5169, "epoch": 3 }, { "type": "loss", "content": 0.000262056099018082, "timestamp": "2025-09-30 22:20:27.495393", "step": 5170, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:27.531964", "step": 5170, "epoch": 3 }, { "type": "loss", "content": 0.0019671518821269274, "timestamp": "2025-09-30 22:20:27.543174", "step": 5171, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:27.583221", "step": 5171, "epoch": 3 }, { "type": "loss", "content": 0.00355559797026217, "timestamp": "2025-09-30 22:20:27.617755", "step": 5172, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:27.657834", "step": 5172, "epoch": 3 }, { "type": "loss", "content": 0.0005548345507122576, "timestamp": "2025-09-30 22:20:27.670882", "step": 5173, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:27.713145", "step": 5173, "epoch": 3 }, { "type": "loss", "content": 0.0007509011193178594, "timestamp": "2025-09-30 22:20:27.720494", "step": 5174, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:27.773740", "step": 5174, "epoch": 3 }, { "type": "loss", "content": 0.0009994101710617542, "timestamp": "2025-09-30 22:20:27.786281", "step": 5175, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:20:30.397236", "step": 5175, "epoch": 3 }, { "type": "pplx", "content": 5.938044559665769, "timestamp": "2025-09-30 22:20:30.406806", "step": 5175, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:20:30.457948", "step": 5175, "epoch": 3 }, { "type": "loss", "content": 0.0033478126861155033, "timestamp": "2025-09-30 22:20:30.496544", "step": 5176, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:30.534459", "step": 5176, "epoch": 3 }, { "type": "loss", "content": 0.006446576677262783, "timestamp": "2025-09-30 22:20:30.544136", "step": 5177, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:30.579979", "step": 5177, "epoch": 3 }, { "type": "loss", "content": 0.00023951790353748947, "timestamp": "2025-09-30 22:20:30.592285", "step": 5178, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:30.625485", "step": 5178, "epoch": 3 }, { "type": "loss", "content": 0.00028509373078122735, "timestamp": "2025-09-30 22:20:30.635754", "step": 5179, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:30.672135", "step": 5179, "epoch": 3 }, { "type": "loss", "content": 0.001627097255550325, "timestamp": "2025-09-30 22:20:30.703372", "step": 5180, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:30.736927", "step": 5180, "epoch": 3 }, { "type": "loss", "content": 0.016529927030205727, "timestamp": "2025-09-30 22:20:30.742622", "step": 5181, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:30.778174", "step": 5181, "epoch": 3 }, { "type": "loss", "content": 0.00010932084842352197, "timestamp": "2025-09-30 22:20:30.786223", "step": 5182, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:30.824697", "step": 5182, "epoch": 3 }, { "type": "loss", "content": 0.008478675037622452, "timestamp": "2025-09-30 22:20:30.835729", "step": 5183, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:30.885503", "step": 5183, "epoch": 3 }, { "type": "loss", "content": 0.0004986929707229137, "timestamp": "2025-09-30 22:20:30.916590", "step": 5184, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:30.955441", "step": 5184, "epoch": 3 }, { "type": "loss", "content": 0.0013999422080814838, "timestamp": "2025-09-30 22:20:30.968507", "step": 5185, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:31.017174", "step": 5185, "epoch": 3 }, { "type": "loss", "content": 0.0019102203659713268, "timestamp": "2025-09-30 22:20:31.024784", "step": 5186, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:31.058202", "step": 5186, "epoch": 3 }, { "type": "loss", "content": 0.002953177783638239, "timestamp": "2025-09-30 22:20:31.065633", "step": 5187, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:31.101080", "step": 5187, "epoch": 3 }, { "type": "loss", "content": 0.012278086505830288, "timestamp": "2025-09-30 22:20:31.132862", "step": 5188, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:31.166258", "step": 5188, "epoch": 3 }, { "type": "loss", "content": 0.002866664668545127, "timestamp": "2025-09-30 22:20:31.174860", "step": 5189, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:31.208799", "step": 5189, "epoch": 3 }, { "type": "loss", "content": 0.005537381861358881, "timestamp": "2025-09-30 22:20:31.221348", "step": 5190, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:31.265227", "step": 5190, "epoch": 3 }, { "type": "loss", "content": 0.0009773658821359277, "timestamp": "2025-09-30 22:20:31.278516", "step": 5191, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:31.318770", "step": 5191, "epoch": 3 }, { "type": "loss", "content": 0.00014409396681003273, "timestamp": "2025-09-30 22:20:31.349869", "step": 5192, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:31.382776", "step": 5192, "epoch": 3 }, { "type": "loss", "content": 0.005357799585908651, "timestamp": "2025-09-30 22:20:31.391287", "step": 5193, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:31.437125", "step": 5193, "epoch": 3 }, { "type": "loss", "content": 0.0030632750131189823, "timestamp": "2025-09-30 22:20:31.448159", "step": 5194, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:31.484449", "step": 5194, "epoch": 3 }, { "type": "loss", "content": 0.0006114967400208116, "timestamp": "2025-09-30 22:20:31.491706", "step": 5195, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:31.540795", "step": 5195, "epoch": 3 }, { "type": "loss", "content": 0.0019180022645741701, "timestamp": "2025-09-30 22:20:31.569389", "step": 5196, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:31.603603", "step": 5196, "epoch": 3 }, { "type": "loss", "content": 0.00090098223881796, "timestamp": "2025-09-30 22:20:31.616585", "step": 5197, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:31.659672", "step": 5197, "epoch": 3 }, { "type": "loss", "content": 0.0028920513577759266, "timestamp": "2025-09-30 22:20:31.670691", "step": 5198, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:31.707723", "step": 5198, "epoch": 3 }, { "type": "loss", "content": 0.0017432866152375937, "timestamp": "2025-09-30 22:20:31.718872", "step": 5199, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:31.750195", "step": 5199, "epoch": 3 }, { "type": "loss", "content": 0.0003439671127125621, "timestamp": "2025-09-30 22:20:31.781536", "step": 5200, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:31.815571", "step": 5200, "epoch": 3 }, { "type": "loss", "content": 0.005172444973140955, "timestamp": "2025-09-30 22:20:31.825574", "step": 5201, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:31.859713", "step": 5201, "epoch": 3 }, { "type": "loss", "content": 0.0005199200822971761, "timestamp": "2025-09-30 22:20:31.872023", "step": 5202, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:31.922284", "step": 5202, "epoch": 3 }, { "type": "loss", "content": 0.0014771092683076859, "timestamp": "2025-09-30 22:20:31.930093", "step": 5203, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:31.963629", "step": 5203, "epoch": 3 }, { "type": "loss", "content": 0.003369405400007963, "timestamp": "2025-09-30 22:20:31.995512", "step": 5204, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:32.028677", "step": 5204, "epoch": 3 }, { "type": "loss", "content": 0.0018177288584411144, "timestamp": "2025-09-30 22:20:32.036563", "step": 5205, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:32.072676", "step": 5205, "epoch": 3 }, { "type": "loss", "content": 0.0003869224456138909, "timestamp": "2025-09-30 22:20:32.080301", "step": 5206, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:32.113509", "step": 5206, "epoch": 3 }, { "type": "loss", "content": 0.0002616798155941069, "timestamp": "2025-09-30 22:20:32.120523", "step": 5207, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:32.155512", "step": 5207, "epoch": 3 }, { "type": "loss", "content": 0.003533238312229514, "timestamp": "2025-09-30 22:20:32.184369", "step": 5208, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:32.223681", "step": 5208, "epoch": 3 }, { "type": "loss", "content": 0.002410394372418523, "timestamp": "2025-09-30 22:20:32.228986", "step": 5209, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:32.260956", "step": 5209, "epoch": 3 }, { "type": "loss", "content": 0.0016966222319751978, "timestamp": "2025-09-30 22:20:32.273316", "step": 5210, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:32.309486", "step": 5210, "epoch": 3 }, { "type": "loss", "content": 0.0007273682276718318, "timestamp": "2025-09-30 22:20:32.322850", "step": 5211, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:32.364176", "step": 5211, "epoch": 3 }, { "type": "loss", "content": 0.000545423710718751, "timestamp": "2025-09-30 22:20:32.393092", "step": 5212, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:32.429172", "step": 5212, "epoch": 3 }, { "type": "loss", "content": 0.0007850287365727127, "timestamp": "2025-09-30 22:20:32.434818", "step": 5213, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:32.475036", "step": 5213, "epoch": 3 }, { "type": "loss", "content": 0.0016018090536817908, "timestamp": "2025-09-30 22:20:32.478737", "step": 5214, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:32.512436", "step": 5214, "epoch": 3 }, { "type": "loss", "content": 0.00016927003161981702, "timestamp": "2025-09-30 22:20:32.523527", "step": 5215, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:32.558872", "step": 5215, "epoch": 3 }, { "type": "loss", "content": 0.005434432066977024, "timestamp": "2025-09-30 22:20:32.593075", "step": 5216, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:32.629591", "step": 5216, "epoch": 3 }, { "type": "loss", "content": 0.0004774282278958708, "timestamp": "2025-09-30 22:20:32.637430", "step": 5217, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:32.673076", "step": 5217, "epoch": 3 }, { "type": "loss", "content": 0.0003328354796394706, "timestamp": "2025-09-30 22:20:32.684226", "step": 5218, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:32.726073", "step": 5218, "epoch": 3 }, { "type": "loss", "content": 0.000514206534717232, "timestamp": "2025-09-30 22:20:32.739686", "step": 5219, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:32.776011", "step": 5219, "epoch": 3 }, { "type": "loss", "content": 0.00044423979124985635, "timestamp": "2025-09-30 22:20:32.809413", "step": 5220, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:32.846641", "step": 5220, "epoch": 3 }, { "type": "loss", "content": 0.0009071379899978638, "timestamp": "2025-09-30 22:20:32.855464", "step": 5221, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:32.889382", "step": 5221, "epoch": 3 }, { "type": "loss", "content": 0.00041850502020679414, "timestamp": "2025-09-30 22:20:32.901927", "step": 5222, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:32.935762", "step": 5222, "epoch": 3 }, { "type": "loss", "content": 0.002450287574902177, "timestamp": "2025-09-30 22:20:32.946753", "step": 5223, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:32.985650", "step": 5223, "epoch": 3 }, { "type": "loss", "content": 0.0014607037883251905, "timestamp": "2025-09-30 22:20:33.019133", "step": 5224, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:33.057776", "step": 5224, "epoch": 3 }, { "type": "loss", "content": 0.0015632001450285316, "timestamp": "2025-09-30 22:20:33.070439", "step": 5225, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:33.110415", "step": 5225, "epoch": 3 }, { "type": "loss", "content": 0.0019418747397139668, "timestamp": "2025-09-30 22:20:33.122403", "step": 5226, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:33.159264", "step": 5226, "epoch": 3 }, { "type": "loss", "content": 0.00225188210606575, "timestamp": "2025-09-30 22:20:33.171790", "step": 5227, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:33.205574", "step": 5227, "epoch": 3 }, { "type": "loss", "content": 0.0004304961476009339, "timestamp": "2025-09-30 22:20:33.238799", "step": 5228, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:33.275459", "step": 5228, "epoch": 3 }, { "type": "loss", "content": 0.007783898152410984, "timestamp": "2025-09-30 22:20:33.283634", "step": 5229, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:33.324598", "step": 5229, "epoch": 3 }, { "type": "loss", "content": 0.000680253840982914, "timestamp": "2025-09-30 22:20:33.338330", "step": 5230, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:33.374389", "step": 5230, "epoch": 3 }, { "type": "loss", "content": 0.008012272417545319, "timestamp": "2025-09-30 22:20:33.386779", "step": 5231, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:33.422279", "step": 5231, "epoch": 3 }, { "type": "loss", "content": 0.001280724536627531, "timestamp": "2025-09-30 22:20:33.455439", "step": 5232, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:33.497268", "step": 5232, "epoch": 3 }, { "type": "loss", "content": 0.0010424808133393526, "timestamp": "2025-09-30 22:20:33.507103", "step": 5233, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:33.547504", "step": 5233, "epoch": 3 }, { "type": "loss", "content": 0.00045934141962789, "timestamp": "2025-09-30 22:20:33.560896", "step": 5234, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:33.597460", "step": 5234, "epoch": 3 }, { "type": "loss", "content": 0.008249049074947834, "timestamp": "2025-09-30 22:20:33.609585", "step": 5235, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:33.643445", "step": 5235, "epoch": 3 }, { "type": "loss", "content": 0.0005358067573979497, "timestamp": "2025-09-30 22:20:33.676420", "step": 5236, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:33.712599", "step": 5236, "epoch": 3 }, { "type": "loss", "content": 0.0032138959504663944, "timestamp": "2025-09-30 22:20:33.725288", "step": 5237, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:33.761490", "step": 5237, "epoch": 3 }, { "type": "loss", "content": 0.00473925331607461, "timestamp": "2025-09-30 22:20:33.774046", "step": 5238, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:33.816568", "step": 5238, "epoch": 3 }, { "type": "loss", "content": 0.0025055333971977234, "timestamp": "2025-09-30 22:20:33.827764", "step": 5239, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:33.864876", "step": 5239, "epoch": 3 }, { "type": "loss", "content": 0.00852253008633852, "timestamp": "2025-09-30 22:20:33.899156", "step": 5240, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:33.932394", "step": 5240, "epoch": 3 }, { "type": "loss", "content": 0.014438341371715069, "timestamp": "2025-09-30 22:20:33.943061", "step": 5241, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:33.998064", "step": 5241, "epoch": 3 }, { "type": "loss", "content": 0.0006247243145480752, "timestamp": "2025-09-30 22:20:34.009118", "step": 5242, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:34.045474", "step": 5242, "epoch": 3 }, { "type": "loss", "content": 0.0036119078285992146, "timestamp": "2025-09-30 22:20:34.058814", "step": 5243, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:34.101596", "step": 5243, "epoch": 3 }, { "type": "loss", "content": 0.005055837798863649, "timestamp": "2025-09-30 22:20:34.130365", "step": 5244, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:34.165120", "step": 5244, "epoch": 3 }, { "type": "loss", "content": 0.0008276871521957219, "timestamp": "2025-09-30 22:20:34.173174", "step": 5245, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:34.218116", "step": 5245, "epoch": 3 }, { "type": "loss", "content": 0.001044438686221838, "timestamp": "2025-09-30 22:20:34.229154", "step": 5246, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:34.264786", "step": 5246, "epoch": 3 }, { "type": "loss", "content": 0.0003220552462153137, "timestamp": "2025-09-30 22:20:34.278504", "step": 5247, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:34.330796", "step": 5247, "epoch": 3 }, { "type": "loss", "content": 0.0025070966221392155, "timestamp": "2025-09-30 22:20:34.359689", "step": 5248, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:34.412179", "step": 5248, "epoch": 3 }, { "type": "loss", "content": 0.0007652582135051489, "timestamp": "2025-09-30 22:20:34.420284", "step": 5249, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:20:34.466731", "step": 5249, "epoch": 3 }, { "type": "loss", "content": 0.001774575561285019, "timestamp": "2025-09-30 22:20:34.480719", "step": 5250, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:34.514465", "step": 5250, "epoch": 3 }, { "type": "loss", "content": 0.02314218133687973, "timestamp": "2025-09-30 22:20:34.525574", "step": 5251, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:34.571285", "step": 5251, "epoch": 3 }, { "type": "loss", "content": 0.007349980063736439, "timestamp": "2025-09-30 22:20:34.602497", "step": 5252, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:34.646748", "step": 5252, "epoch": 3 }, { "type": "loss", "content": 0.0015652753645554185, "timestamp": "2025-09-30 22:20:34.655354", "step": 5253, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:34.695670", "step": 5253, "epoch": 3 }, { "type": "loss", "content": 0.0008087062160484493, "timestamp": "2025-09-30 22:20:34.708004", "step": 5254, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:34.746280", "step": 5254, "epoch": 3 }, { "type": "loss", "content": 0.006365319713950157, "timestamp": "2025-09-30 22:20:34.759676", "step": 5255, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:34.796812", "step": 5255, "epoch": 3 }, { "type": "loss", "content": 0.0005592239904217422, "timestamp": "2025-09-30 22:20:34.828055", "step": 5256, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:34.860427", "step": 5256, "epoch": 3 }, { "type": "loss", "content": 0.0007176320650614798, "timestamp": "2025-09-30 22:20:34.868293", "step": 5257, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:34.906279", "step": 5257, "epoch": 3 }, { "type": "loss", "content": 0.0006524588097818196, "timestamp": "2025-09-30 22:20:34.918829", "step": 5258, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:34.956194", "step": 5258, "epoch": 3 }, { "type": "loss", "content": 0.0019349107751622796, "timestamp": "2025-09-30 22:20:34.967311", "step": 5259, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:35.004851", "step": 5259, "epoch": 3 }, { "type": "loss", "content": 0.004506261087954044, "timestamp": "2025-09-30 22:20:35.038294", "step": 5260, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:35.075235", "step": 5260, "epoch": 3 }, { "type": "loss", "content": 0.0013760910369455814, "timestamp": "2025-09-30 22:20:35.085792", "step": 5261, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:35.124341", "step": 5261, "epoch": 3 }, { "type": "loss", "content": 0.002927417866885662, "timestamp": "2025-09-30 22:20:35.136755", "step": 5262, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:35.177175", "step": 5262, "epoch": 3 }, { "type": "loss", "content": 0.0003989685792475939, "timestamp": "2025-09-30 22:20:35.189530", "step": 5263, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:35.237739", "step": 5263, "epoch": 3 }, { "type": "loss", "content": 0.000184141201316379, "timestamp": "2025-09-30 22:20:35.272385", "step": 5264, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:35.312270", "step": 5264, "epoch": 3 }, { "type": "loss", "content": 0.0003238429198972881, "timestamp": "2025-09-30 22:20:35.324865", "step": 5265, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:20:35.371176", "step": 5265, "epoch": 3 }, { "type": "loss", "content": 9.319038508692756e-05, "timestamp": "2025-09-30 22:20:35.387082", "step": 5266, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:20:35.430175", "step": 5266, "epoch": 3 }, { "type": "loss", "content": 0.0006466032355092466, "timestamp": "2025-09-30 22:20:35.445799", "step": 5267, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:35.502586", "step": 5267, "epoch": 3 }, { "type": "loss", "content": 0.004853468853980303, "timestamp": "2025-09-30 22:20:35.537100", "step": 5268, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:35.569918", "step": 5268, "epoch": 3 }, { "type": "loss", "content": 0.010006862692534924, "timestamp": "2025-09-30 22:20:35.578581", "step": 5269, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:35.616650", "step": 5269, "epoch": 3 }, { "type": "loss", "content": 0.0016347052296623588, "timestamp": "2025-09-30 22:20:35.627806", "step": 5270, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:35.661170", "step": 5270, "epoch": 3 }, { "type": "loss", "content": 0.000581001047976315, "timestamp": "2025-09-30 22:20:35.671529", "step": 5271, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:35.706630", "step": 5271, "epoch": 3 }, { "type": "loss", "content": 0.0010013194987550378, "timestamp": "2025-09-30 22:20:35.740109", "step": 5272, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:35.773585", "step": 5272, "epoch": 3 }, { "type": "loss", "content": 0.007017929572612047, "timestamp": "2025-09-30 22:20:35.781484", "step": 5273, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:35.826335", "step": 5273, "epoch": 3 }, { "type": "loss", "content": 0.00020266005594749004, "timestamp": "2025-09-30 22:20:35.862472", "step": 5274, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:35.917093", "step": 5274, "epoch": 3 }, { "type": "loss", "content": 0.00014247871877159923, "timestamp": "2025-09-30 22:20:35.927471", "step": 5275, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:35.964544", "step": 5275, "epoch": 3 }, { "type": "loss", "content": 0.0002896743535529822, "timestamp": "2025-09-30 22:20:35.997994", "step": 5276, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:36.033731", "step": 5276, "epoch": 3 }, { "type": "loss", "content": 0.0008965826709754765, "timestamp": "2025-09-30 22:20:36.046395", "step": 5277, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:36.084641", "step": 5277, "epoch": 3 }, { "type": "loss", "content": 0.0016090476419776678, "timestamp": "2025-09-30 22:20:36.095641", "step": 5278, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:36.146740", "step": 5278, "epoch": 3 }, { "type": "loss", "content": 0.0015315264463424683, "timestamp": "2025-09-30 22:20:36.154395", "step": 5279, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:36.191425", "step": 5279, "epoch": 3 }, { "type": "loss", "content": 0.0013392592081800103, "timestamp": "2025-09-30 22:20:36.222347", "step": 5280, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:36.266446", "step": 5280, "epoch": 3 }, { "type": "loss", "content": 0.00462631369009614, "timestamp": "2025-09-30 22:20:36.275215", "step": 5281, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:36.311795", "step": 5281, "epoch": 3 }, { "type": "loss", "content": 0.00020836050680372864, "timestamp": "2025-09-30 22:20:36.324124", "step": 5282, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:36.365008", "step": 5282, "epoch": 3 }, { "type": "loss", "content": 0.0002244135393993929, "timestamp": "2025-09-30 22:20:36.378865", "step": 5283, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:36.422522", "step": 5283, "epoch": 3 }, { "type": "loss", "content": 0.0002135797985829413, "timestamp": "2025-09-30 22:20:36.451273", "step": 5284, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:36.488841", "step": 5284, "epoch": 3 }, { "type": "loss", "content": 0.0008744591032154858, "timestamp": "2025-09-30 22:20:36.498808", "step": 5285, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:36.542770", "step": 5285, "epoch": 3 }, { "type": "loss", "content": 0.008620786480605602, "timestamp": "2025-09-30 22:20:36.556629", "step": 5286, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:36.594477", "step": 5286, "epoch": 3 }, { "type": "loss", "content": 0.00015636156604159623, "timestamp": "2025-09-30 22:20:36.604903", "step": 5287, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:36.646000", "step": 5287, "epoch": 3 }, { "type": "loss", "content": 0.0020843285601586103, "timestamp": "2025-09-30 22:20:36.679184", "step": 5288, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:20:36.730250", "step": 5288, "epoch": 3 }, { "type": "loss", "content": 0.004855290055274963, "timestamp": "2025-09-30 22:20:36.748651", "step": 5289, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:36.797612", "step": 5289, "epoch": 3 }, { "type": "loss", "content": 0.0006399782723747194, "timestamp": "2025-09-30 22:20:36.813054", "step": 5290, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:20:39.518380", "step": 5290, "epoch": 3 }, { "type": "pplx", "content": 6.157207483776002, "timestamp": "2025-09-30 22:20:39.520921", "step": 5290, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:39.556360", "step": 5290, "epoch": 3 }, { "type": "loss", "content": 0.0005087924073450267, "timestamp": "2025-09-30 22:20:39.569702", "step": 5291, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:39.610286", "step": 5291, "epoch": 3 }, { "type": "loss", "content": 0.00027282178052701056, "timestamp": "2025-09-30 22:20:39.644467", "step": 5292, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:39.700668", "step": 5292, "epoch": 3 }, { "type": "loss", "content": 0.0003268631116952747, "timestamp": "2025-09-30 22:20:39.710885", "step": 5293, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:39.761528", "step": 5293, "epoch": 3 }, { "type": "loss", "content": 0.010251539759337902, "timestamp": "2025-09-30 22:20:39.775270", "step": 5294, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:39.813329", "step": 5294, "epoch": 3 }, { "type": "loss", "content": 0.0006060664891265333, "timestamp": "2025-09-30 22:20:39.826665", "step": 5295, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:39.871660", "step": 5295, "epoch": 3 }, { "type": "loss", "content": 0.0006611746503040195, "timestamp": "2025-09-30 22:20:39.904833", "step": 5296, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:39.939104", "step": 5296, "epoch": 3 }, { "type": "loss", "content": 0.0018542595207691193, "timestamp": "2025-09-30 22:20:39.944450", "step": 5297, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:39.982181", "step": 5297, "epoch": 3 }, { "type": "loss", "content": 0.002848684089258313, "timestamp": "2025-09-30 22:20:39.993175", "step": 5298, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:40.027805", "step": 5298, "epoch": 3 }, { "type": "loss", "content": 0.0006789477774873376, "timestamp": "2025-09-30 22:20:40.040408", "step": 5299, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:20:40.093326", "step": 5299, "epoch": 3 }, { "type": "loss", "content": 0.0015816806117072701, "timestamp": "2025-09-30 22:20:40.130374", "step": 5300, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:40.175484", "step": 5300, "epoch": 3 }, { "type": "loss", "content": 0.0007194660720415413, "timestamp": "2025-09-30 22:20:40.188133", "step": 5301, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:20:40.245109", "step": 5301, "epoch": 3 }, { "type": "loss", "content": 0.0011649384396150708, "timestamp": "2025-09-30 22:20:40.260743", "step": 5302, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:40.328261", "step": 5302, "epoch": 3 }, { "type": "loss", "content": 0.0008334691519849002, "timestamp": "2025-09-30 22:20:40.340585", "step": 5303, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:40.376233", "step": 5303, "epoch": 3 }, { "type": "loss", "content": 0.0025359534192830324, "timestamp": "2025-09-30 22:20:40.409660", "step": 5304, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:40.444824", "step": 5304, "epoch": 3 }, { "type": "loss", "content": 0.002035485114902258, "timestamp": "2025-09-30 22:20:40.452800", "step": 5305, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:40.498002", "step": 5305, "epoch": 3 }, { "type": "loss", "content": 0.003279902972280979, "timestamp": "2025-09-30 22:20:40.510343", "step": 5306, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:40.560290", "step": 5306, "epoch": 3 }, { "type": "loss", "content": 0.0007503706146962941, "timestamp": "2025-09-30 22:20:40.573956", "step": 5307, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:20:40.618255", "step": 5307, "epoch": 3 }, { "type": "loss", "content": 0.0024530754890292883, "timestamp": "2025-09-30 22:20:40.643483", "step": 5308, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:20:40.688463", "step": 5308, "epoch": 3 }, { "type": "loss", "content": 0.0009420658461749554, "timestamp": "2025-09-30 22:20:40.703510", "step": 5309, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-30 22:20:40.741677", "step": 5309, "epoch": 3 }, { "type": "loss", "content": 6.078934529796243e-05, "timestamp": "2025-09-30 22:20:40.746119", "step": 5310, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:20:40.799835", "step": 5310, "epoch": 3 }, { "type": "loss", "content": 8.060686377575621e-05, "timestamp": "2025-09-30 22:20:40.816066", "step": 5311, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:40.864495", "step": 5311, "epoch": 3 }, { "type": "loss", "content": 0.0018246863037347794, "timestamp": "2025-09-30 22:20:40.892405", "step": 5312, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:40.952332", "step": 5312, "epoch": 3 }, { "type": "loss", "content": 0.0007188029121607542, "timestamp": "2025-09-30 22:20:40.960695", "step": 5313, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:40.995751", "step": 5313, "epoch": 3 }, { "type": "loss", "content": 0.0002277821331517771, "timestamp": "2025-09-30 22:20:41.006797", "step": 5314, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:41.052679", "step": 5314, "epoch": 3 }, { "type": "loss", "content": 0.0015718286158517003, "timestamp": "2025-09-30 22:20:41.063136", "step": 5315, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:41.097681", "step": 5315, "epoch": 3 }, { "type": "loss", "content": 0.002976819407194853, "timestamp": "2025-09-30 22:20:41.130855", "step": 5316, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:41.165424", "step": 5316, "epoch": 3 }, { "type": "loss", "content": 0.0011995661770924926, "timestamp": "2025-09-30 22:20:41.170286", "step": 5317, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:41.213789", "step": 5317, "epoch": 3 }, { "type": "loss", "content": 0.00014211825327947736, "timestamp": "2025-09-30 22:20:41.225606", "step": 5318, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:41.268612", "step": 5318, "epoch": 3 }, { "type": "loss", "content": 0.000843456422444433, "timestamp": "2025-09-30 22:20:41.279518", "step": 5319, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:41.326659", "step": 5319, "epoch": 3 }, { "type": "loss", "content": 0.0014417750062420964, "timestamp": "2025-09-30 22:20:41.355443", "step": 5320, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:20:41.390689", "step": 5320, "epoch": 3 }, { "type": "loss", "content": 0.0017638927092775702, "timestamp": "2025-09-30 22:20:41.394355", "step": 5321, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:41.429245", "step": 5321, "epoch": 3 }, { "type": "loss", "content": 0.00016600944218225777, "timestamp": "2025-09-30 22:20:41.441381", "step": 5322, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:20:41.496131", "step": 5322, "epoch": 3 }, { "type": "loss", "content": 0.000154578490764834, "timestamp": "2025-09-30 22:20:41.511959", "step": 5323, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:41.546816", "step": 5323, "epoch": 3 }, { "type": "loss", "content": 0.00029848323902115226, "timestamp": "2025-09-30 22:20:41.573935", "step": 5324, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:41.625919", "step": 5324, "epoch": 3 }, { "type": "loss", "content": 0.0010769865475594997, "timestamp": "2025-09-30 22:20:41.635992", "step": 5325, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:41.684496", "step": 5325, "epoch": 3 }, { "type": "loss", "content": 0.009188102558255196, "timestamp": "2025-09-30 22:20:41.696127", "step": 5326, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:41.738724", "step": 5326, "epoch": 3 }, { "type": "loss", "content": 0.0021131394896656275, "timestamp": "2025-09-30 22:20:41.749109", "step": 5327, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:41.791431", "step": 5327, "epoch": 3 }, { "type": "loss", "content": 0.000720141630154103, "timestamp": "2025-09-30 22:20:41.826112", "step": 5328, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:41.867175", "step": 5328, "epoch": 3 }, { "type": "loss", "content": 0.0013144101249054074, "timestamp": "2025-09-30 22:20:41.875934", "step": 5329, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:41.919620", "step": 5329, "epoch": 3 }, { "type": "loss", "content": 0.00027391567709855735, "timestamp": "2025-09-30 22:20:41.931995", "step": 5330, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:41.974162", "step": 5330, "epoch": 3 }, { "type": "loss", "content": 0.0004535318003036082, "timestamp": "2025-09-30 22:20:41.986678", "step": 5331, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:42.021345", "step": 5331, "epoch": 3 }, { "type": "loss", "content": 0.00015783542767167091, "timestamp": "2025-09-30 22:20:42.054488", "step": 5332, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:42.089363", "step": 5332, "epoch": 3 }, { "type": "loss", "content": 0.0016249046893790364, "timestamp": "2025-09-30 22:20:42.102400", "step": 5333, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:42.148800", "step": 5333, "epoch": 3 }, { "type": "loss", "content": 0.0012738303048536181, "timestamp": "2025-09-30 22:20:42.162267", "step": 5334, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:42.207882", "step": 5334, "epoch": 3 }, { "type": "loss", "content": 0.0007393105188384652, "timestamp": "2025-09-30 22:20:42.221302", "step": 5335, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:42.265253", "step": 5335, "epoch": 3 }, { "type": "loss", "content": 0.008159262128174305, "timestamp": "2025-09-30 22:20:42.298161", "step": 5336, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:42.346737", "step": 5336, "epoch": 3 }, { "type": "loss", "content": 0.004605071619153023, "timestamp": "2025-09-30 22:20:42.356313", "step": 5337, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:42.403054", "step": 5337, "epoch": 3 }, { "type": "loss", "content": 0.00010321156878490001, "timestamp": "2025-09-30 22:20:42.413202", "step": 5338, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:42.460354", "step": 5338, "epoch": 3 }, { "type": "loss", "content": 0.0001988293806789443, "timestamp": "2025-09-30 22:20:42.466492", "step": 5339, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:42.510940", "step": 5339, "epoch": 3 }, { "type": "loss", "content": 0.0008656299905851483, "timestamp": "2025-09-30 22:20:42.542279", "step": 5340, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:42.580546", "step": 5340, "epoch": 3 }, { "type": "loss", "content": 0.0016622475814074278, "timestamp": "2025-09-30 22:20:42.593619", "step": 5341, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:42.630049", "step": 5341, "epoch": 3 }, { "type": "loss", "content": 0.0063072070479393005, "timestamp": "2025-09-30 22:20:42.637372", "step": 5342, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:42.676457", "step": 5342, "epoch": 3 }, { "type": "loss", "content": 0.0005689572426490486, "timestamp": "2025-09-30 22:20:42.690183", "step": 5343, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:42.733094", "step": 5343, "epoch": 3 }, { "type": "loss", "content": 0.00042845390271395445, "timestamp": "2025-09-30 22:20:42.761998", "step": 5344, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:42.806381", "step": 5344, "epoch": 3 }, { "type": "loss", "content": 0.0014855386689305305, "timestamp": "2025-09-30 22:20:42.814561", "step": 5345, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:42.851778", "step": 5345, "epoch": 3 }, { "type": "loss", "content": 0.0006970709073357284, "timestamp": "2025-09-30 22:20:42.864320", "step": 5346, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:42.897836", "step": 5346, "epoch": 3 }, { "type": "loss", "content": 0.00020175872487016022, "timestamp": "2025-09-30 22:20:42.905442", "step": 5347, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:42.950083", "step": 5347, "epoch": 3 }, { "type": "loss", "content": 0.0008992621442303061, "timestamp": "2025-09-30 22:20:42.984291", "step": 5348, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:43.030136", "step": 5348, "epoch": 3 }, { "type": "loss", "content": 0.025950860232114792, "timestamp": "2025-09-30 22:20:43.038943", "step": 5349, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:43.083207", "step": 5349, "epoch": 3 }, { "type": "loss", "content": 0.0006523510091938078, "timestamp": "2025-09-30 22:20:43.091248", "step": 5350, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:43.125114", "step": 5350, "epoch": 3 }, { "type": "loss", "content": 0.0005884814891032875, "timestamp": "2025-09-30 22:20:43.135670", "step": 5351, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:43.169338", "step": 5351, "epoch": 3 }, { "type": "loss", "content": 0.023698223754763603, "timestamp": "2025-09-30 22:20:43.198211", "step": 5352, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:43.238318", "step": 5352, "epoch": 3 }, { "type": "loss", "content": 0.00043184056994505227, "timestamp": "2025-09-30 22:20:43.255173", "step": 5353, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:43.315358", "step": 5353, "epoch": 3 }, { "type": "loss", "content": 0.00028453642153181136, "timestamp": "2025-09-30 22:20:43.328792", "step": 5354, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:43.381360", "step": 5354, "epoch": 3 }, { "type": "loss", "content": 0.000272301520453766, "timestamp": "2025-09-30 22:20:43.401062", "step": 5355, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:20:43.464266", "step": 5355, "epoch": 3 }, { "type": "loss", "content": 0.012864899821579456, "timestamp": "2025-09-30 22:20:43.501279", "step": 5356, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:43.540332", "step": 5356, "epoch": 3 }, { "type": "loss", "content": 0.0010666355956345797, "timestamp": "2025-09-30 22:20:43.553018", "step": 5357, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:43.589037", "step": 5357, "epoch": 3 }, { "type": "loss", "content": 0.0013121003285050392, "timestamp": "2025-09-30 22:20:43.601595", "step": 5358, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:43.651520", "step": 5358, "epoch": 3 }, { "type": "loss", "content": 0.001673312857747078, "timestamp": "2025-09-30 22:20:43.662676", "step": 5359, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:43.714889", "step": 5359, "epoch": 3 }, { "type": "loss", "content": 0.010204694233834743, "timestamp": "2025-09-30 22:20:43.749511", "step": 5360, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:20:43.798275", "step": 5360, "epoch": 3 }, { "type": "loss", "content": 0.0004900519852526486, "timestamp": "2025-09-30 22:20:43.811648", "step": 5361, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:43.855331", "step": 5361, "epoch": 3 }, { "type": "loss", "content": 8.291222911793739e-05, "timestamp": "2025-09-30 22:20:43.863374", "step": 5362, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:43.898066", "step": 5362, "epoch": 3 }, { "type": "loss", "content": 0.00014946149894967675, "timestamp": "2025-09-30 22:20:43.910598", "step": 5363, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:43.947048", "step": 5363, "epoch": 3 }, { "type": "loss", "content": 6.812860374338925e-05, "timestamp": "2025-09-30 22:20:43.975881", "step": 5364, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-30 22:20:44.008283", "step": 5364, "epoch": 3 }, { "type": "loss", "content": 0.00020966037118341774, "timestamp": "2025-09-30 22:20:44.013164", "step": 5365, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:44.046423", "step": 5365, "epoch": 3 }, { "type": "loss", "content": 0.0008381285006180406, "timestamp": "2025-09-30 22:20:44.054499", "step": 5366, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:44.095836", "step": 5366, "epoch": 3 }, { "type": "loss", "content": 0.013554837554693222, "timestamp": "2025-09-30 22:20:44.106549", "step": 5367, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:44.149265", "step": 5367, "epoch": 3 }, { "type": "loss", "content": 0.003462092485278845, "timestamp": "2025-09-30 22:20:44.177796", "step": 5368, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:44.216957", "step": 5368, "epoch": 3 }, { "type": "loss", "content": 0.0015900923172011971, "timestamp": "2025-09-30 22:20:44.226879", "step": 5369, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-30 22:20:44.276180", "step": 5369, "epoch": 3 }, { "type": "loss", "content": 0.002574779326096177, "timestamp": "2025-09-30 22:20:44.293295", "step": 5370, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:20:44.337048", "step": 5370, "epoch": 3 }, { "type": "loss", "content": 0.0006000144057907164, "timestamp": "2025-09-30 22:20:44.353177", "step": 5371, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:44.389279", "step": 5371, "epoch": 3 }, { "type": "loss", "content": 0.015625132247805595, "timestamp": "2025-09-30 22:20:44.423501", "step": 5372, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:44.480047", "step": 5372, "epoch": 3 }, { "type": "loss", "content": 0.00029536019428633153, "timestamp": "2025-09-30 22:20:44.490243", "step": 5373, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:44.534152", "step": 5373, "epoch": 3 }, { "type": "loss", "content": 0.0016645942814648151, "timestamp": "2025-09-30 22:20:44.547502", "step": 5374, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:44.593183", "step": 5374, "epoch": 3 }, { "type": "loss", "content": 0.006586844101548195, "timestamp": "2025-09-30 22:20:44.606401", "step": 5375, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:44.645070", "step": 5375, "epoch": 3 }, { "type": "loss", "content": 0.010785430669784546, "timestamp": "2025-09-30 22:20:44.679334", "step": 5376, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:44.715488", "step": 5376, "epoch": 3 }, { "type": "loss", "content": 0.006151542533189058, "timestamp": "2025-09-30 22:20:44.728665", "step": 5377, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:44.762109", "step": 5377, "epoch": 3 }, { "type": "loss", "content": 0.004861661233007908, "timestamp": "2025-09-30 22:20:44.772780", "step": 5378, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:44.809751", "step": 5378, "epoch": 3 }, { "type": "loss", "content": 0.00281528034247458, "timestamp": "2025-09-30 22:20:44.823491", "step": 5379, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:44.856719", "step": 5379, "epoch": 3 }, { "type": "loss", "content": 0.0017934244824573398, "timestamp": "2025-09-30 22:20:44.889952", "step": 5380, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:44.934312", "step": 5380, "epoch": 3 }, { "type": "loss", "content": 0.0002498268731869757, "timestamp": "2025-09-30 22:20:44.947276", "step": 5381, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:44.981882", "step": 5381, "epoch": 3 }, { "type": "loss", "content": 0.0021900867577642202, "timestamp": "2025-09-30 22:20:44.994432", "step": 5382, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:45.027886", "step": 5382, "epoch": 3 }, { "type": "loss", "content": 0.0006635947502218187, "timestamp": "2025-09-30 22:20:45.035783", "step": 5383, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:45.073738", "step": 5383, "epoch": 3 }, { "type": "loss", "content": 0.0046465955674648285, "timestamp": "2025-09-30 22:20:45.107983", "step": 5384, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:20:45.151573", "step": 5384, "epoch": 3 }, { "type": "loss", "content": 0.0010856845183297992, "timestamp": "2025-09-30 22:20:45.166766", "step": 5385, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:45.211473", "step": 5385, "epoch": 3 }, { "type": "loss", "content": 0.0001665302988840267, "timestamp": "2025-09-30 22:20:45.223808", "step": 5386, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:45.263857", "step": 5386, "epoch": 3 }, { "type": "loss", "content": 0.0006953799165785313, "timestamp": "2025-09-30 22:20:45.276436", "step": 5387, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:45.321565", "step": 5387, "epoch": 3 }, { "type": "loss", "content": 0.0009348868625238538, "timestamp": "2025-09-30 22:20:45.354808", "step": 5388, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:45.394481", "step": 5388, "epoch": 3 }, { "type": "loss", "content": 0.0010101856896653771, "timestamp": "2025-09-30 22:20:45.403149", "step": 5389, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:45.466172", "step": 5389, "epoch": 3 }, { "type": "loss", "content": 0.004780636169016361, "timestamp": "2025-09-30 22:20:45.477376", "step": 5390, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:45.511881", "step": 5390, "epoch": 3 }, { "type": "loss", "content": 0.012082463130354881, "timestamp": "2025-09-30 22:20:45.522380", "step": 5391, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:45.557177", "step": 5391, "epoch": 3 }, { "type": "loss", "content": 0.0008092407369986176, "timestamp": "2025-09-30 22:20:45.588368", "step": 5392, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:45.631944", "step": 5392, "epoch": 3 }, { "type": "loss", "content": 0.0012615135638043284, "timestamp": "2025-09-30 22:20:45.640940", "step": 5393, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:45.685595", "step": 5393, "epoch": 3 }, { "type": "loss", "content": 0.002899823011830449, "timestamp": "2025-09-30 22:20:45.696704", "step": 5394, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:45.733505", "step": 5394, "epoch": 3 }, { "type": "loss", "content": 0.0009003611630760133, "timestamp": "2025-09-30 22:20:45.741384", "step": 5395, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:45.791089", "step": 5395, "epoch": 3 }, { "type": "loss", "content": 0.004248927813023329, "timestamp": "2025-09-30 22:20:45.825696", "step": 5396, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:45.864260", "step": 5396, "epoch": 3 }, { "type": "loss", "content": 0.0022605466656386852, "timestamp": "2025-09-30 22:20:45.877291", "step": 5397, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:45.924895", "step": 5397, "epoch": 3 }, { "type": "loss", "content": 0.0018691695295274258, "timestamp": "2025-09-30 22:20:45.937479", "step": 5398, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:45.982123", "step": 5398, "epoch": 3 }, { "type": "loss", "content": 0.004714638460427523, "timestamp": "2025-09-30 22:20:45.992644", "step": 5399, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:46.036767", "step": 5399, "epoch": 3 }, { "type": "loss", "content": 0.0013302817242220044, "timestamp": "2025-09-30 22:20:46.068947", "step": 5400, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:46.105525", "step": 5400, "epoch": 3 }, { "type": "loss", "content": 0.0004780096060130745, "timestamp": "2025-09-30 22:20:46.118593", "step": 5401, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:46.177681", "step": 5401, "epoch": 3 }, { "type": "loss", "content": 0.010950141586363316, "timestamp": "2025-09-30 22:20:46.191362", "step": 5402, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-30 22:20:46.262163", "step": 5402, "epoch": 3 }, { "type": "loss", "content": 0.0008549345657229424, "timestamp": "2025-09-30 22:20:46.278501", "step": 5403, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:46.322339", "step": 5403, "epoch": 3 }, { "type": "loss", "content": 0.0003098884189967066, "timestamp": "2025-09-30 22:20:46.354326", "step": 5404, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:46.394559", "step": 5404, "epoch": 3 }, { "type": "loss", "content": 0.00030104260076768696, "timestamp": "2025-09-30 22:20:46.399710", "step": 5405, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:20:49.232301", "step": 5405, "epoch": 3 }, { "type": "pplx", "content": 6.176700245754593, "timestamp": "2025-09-30 22:20:49.234471", "step": 5405, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:49.265286", "step": 5405, "epoch": 3 }, { "type": "loss", "content": 0.0015386255690827966, "timestamp": "2025-09-30 22:20:49.277795", "step": 5406, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:49.313932", "step": 5406, "epoch": 3 }, { "type": "loss", "content": 0.003452225122600794, "timestamp": "2025-09-30 22:20:49.324443", "step": 5407, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:49.357550", "step": 5407, "epoch": 3 }, { "type": "loss", "content": 0.006190893240272999, "timestamp": "2025-09-30 22:20:49.391004", "step": 5408, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:49.428190", "step": 5408, "epoch": 3 }, { "type": "loss", "content": 0.004402454011142254, "timestamp": "2025-09-30 22:20:49.436914", "step": 5409, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:49.486175", "step": 5409, "epoch": 3 }, { "type": "loss", "content": 0.001000831020064652, "timestamp": "2025-09-30 22:20:49.497265", "step": 5410, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:49.558018", "step": 5410, "epoch": 3 }, { "type": "loss", "content": 0.00031259850948117673, "timestamp": "2025-09-30 22:20:49.570589", "step": 5411, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:49.603567", "step": 5411, "epoch": 3 }, { "type": "loss", "content": 0.008803533390164375, "timestamp": "2025-09-30 22:20:49.636555", "step": 5412, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:49.673417", "step": 5412, "epoch": 3 }, { "type": "loss", "content": 0.0019300552085042, "timestamp": "2025-09-30 22:20:49.679146", "step": 5413, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:49.717014", "step": 5413, "epoch": 3 }, { "type": "loss", "content": 0.003091174876317382, "timestamp": "2025-09-30 22:20:49.730428", "step": 5414, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:49.772962", "step": 5414, "epoch": 3 }, { "type": "loss", "content": 0.00227998080663383, "timestamp": "2025-09-30 22:20:49.781100", "step": 5415, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:49.818516", "step": 5415, "epoch": 3 }, { "type": "loss", "content": 0.001922230003401637, "timestamp": "2025-09-30 22:20:49.853156", "step": 5416, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:49.886187", "step": 5416, "epoch": 3 }, { "type": "loss", "content": 0.004396355245262384, "timestamp": "2025-09-30 22:20:49.894315", "step": 5417, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:49.937412", "step": 5417, "epoch": 3 }, { "type": "loss", "content": 0.003643837058916688, "timestamp": "2025-09-30 22:20:49.944502", "step": 5418, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:49.989098", "step": 5418, "epoch": 3 }, { "type": "loss", "content": 0.0007700038840994239, "timestamp": "2025-09-30 22:20:50.000357", "step": 5419, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:50.036147", "step": 5419, "epoch": 3 }, { "type": "loss", "content": 0.00034244020935148, "timestamp": "2025-09-30 22:20:50.067650", "step": 5420, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:50.103007", "step": 5420, "epoch": 3 }, { "type": "loss", "content": 0.0015500112203881145, "timestamp": "2025-09-30 22:20:50.108636", "step": 5421, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:50.146260", "step": 5421, "epoch": 3 }, { "type": "loss", "content": 0.0006771894986741245, "timestamp": "2025-09-30 22:20:50.158607", "step": 5422, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:50.203742", "step": 5422, "epoch": 3 }, { "type": "loss", "content": 0.0002916664816439152, "timestamp": "2025-09-30 22:20:50.216104", "step": 5423, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:50.264018", "step": 5423, "epoch": 3 }, { "type": "loss", "content": 0.0021485788747668266, "timestamp": "2025-09-30 22:20:50.298540", "step": 5424, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:50.360698", "step": 5424, "epoch": 3 }, { "type": "loss", "content": 0.0031237241346389055, "timestamp": "2025-09-30 22:20:50.373798", "step": 5425, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:50.407195", "step": 5425, "epoch": 3 }, { "type": "loss", "content": 0.0006846352480351925, "timestamp": "2025-09-30 22:20:50.414223", "step": 5426, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:20:50.455165", "step": 5426, "epoch": 3 }, { "type": "loss", "content": 0.0031432094983756542, "timestamp": "2025-09-30 22:20:50.461250", "step": 5427, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-30 22:20:50.496108", "step": 5427, "epoch": 3 }, { "type": "loss", "content": 0.0017296245787292719, "timestamp": "2025-09-30 22:20:50.521285", "step": 5428, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:50.558571", "step": 5428, "epoch": 3 }, { "type": "loss", "content": 0.0013727423502132297, "timestamp": "2025-09-30 22:20:50.568701", "step": 5429, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:50.621634", "step": 5429, "epoch": 3 }, { "type": "loss", "content": 0.0017471632454544306, "timestamp": "2025-09-30 22:20:50.632104", "step": 5430, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:50.678338", "step": 5430, "epoch": 3 }, { "type": "loss", "content": 0.0008682655752636492, "timestamp": "2025-09-30 22:20:50.688655", "step": 5431, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:50.723624", "step": 5431, "epoch": 3 }, { "type": "loss", "content": 0.0007178595406003296, "timestamp": "2025-09-30 22:20:50.752109", "step": 5432, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:50.787262", "step": 5432, "epoch": 3 }, { "type": "loss", "content": 0.007646486163139343, "timestamp": "2025-09-30 22:20:50.793924", "step": 5433, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:50.847055", "step": 5433, "epoch": 3 }, { "type": "loss", "content": 0.0029912583995610476, "timestamp": "2025-09-30 22:20:50.855089", "step": 5434, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:50.906659", "step": 5434, "epoch": 3 }, { "type": "loss", "content": 0.00047764150076545775, "timestamp": "2025-09-30 22:20:50.919264", "step": 5435, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:50.962956", "step": 5435, "epoch": 3 }, { "type": "loss", "content": 0.00027994034462608397, "timestamp": "2025-09-30 22:20:50.996385", "step": 5436, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:51.043702", "step": 5436, "epoch": 3 }, { "type": "loss", "content": 0.011703962460160255, "timestamp": "2025-09-30 22:20:51.052395", "step": 5437, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:51.096411", "step": 5437, "epoch": 3 }, { "type": "loss", "content": 0.001737725455313921, "timestamp": "2025-09-30 22:20:51.109808", "step": 5438, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:51.148509", "step": 5438, "epoch": 3 }, { "type": "loss", "content": 0.0026286798529326916, "timestamp": "2025-09-30 22:20:51.156531", "step": 5439, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:51.190060", "step": 5439, "epoch": 3 }, { "type": "loss", "content": 0.003658075351268053, "timestamp": "2025-09-30 22:20:51.221526", "step": 5440, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:51.253910", "step": 5440, "epoch": 3 }, { "type": "loss", "content": 0.0006490391097031534, "timestamp": "2025-09-30 22:20:51.262842", "step": 5441, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:51.304060", "step": 5441, "epoch": 3 }, { "type": "loss", "content": 0.003733021439984441, "timestamp": "2025-09-30 22:20:51.311956", "step": 5442, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:51.350856", "step": 5442, "epoch": 3 }, { "type": "loss", "content": 0.001515704789198935, "timestamp": "2025-09-30 22:20:51.364233", "step": 5443, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:51.412638", "step": 5443, "epoch": 3 }, { "type": "loss", "content": 0.0006004043971188366, "timestamp": "2025-09-30 22:20:51.444682", "step": 5444, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:51.481210", "step": 5444, "epoch": 3 }, { "type": "loss", "content": 0.0005910450126975775, "timestamp": "2025-09-30 22:20:51.486960", "step": 5445, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:51.535640", "step": 5445, "epoch": 3 }, { "type": "loss", "content": 0.0017474086489528418, "timestamp": "2025-09-30 22:20:51.549021", "step": 5446, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-30 22:20:51.603456", "step": 5446, "epoch": 3 }, { "type": "loss", "content": 0.0037502232007682323, "timestamp": "2025-09-30 22:20:51.621070", "step": 5447, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:51.669938", "step": 5447, "epoch": 3 }, { "type": "loss", "content": 0.004871891345828772, "timestamp": "2025-09-30 22:20:51.704467", "step": 5448, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-30 22:20:51.761803", "step": 5448, "epoch": 3 }, { "type": "loss", "content": 0.003989064134657383, "timestamp": "2025-09-30 22:20:51.777015", "step": 5449, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:51.818006", "step": 5449, "epoch": 3 }, { "type": "loss", "content": 0.013066486455500126, "timestamp": "2025-09-30 22:20:51.830597", "step": 5450, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:51.870697", "step": 5450, "epoch": 3 }, { "type": "loss", "content": 0.002019469393417239, "timestamp": "2025-09-30 22:20:51.883213", "step": 5451, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:51.930014", "step": 5451, "epoch": 3 }, { "type": "loss", "content": 0.010137668810784817, "timestamp": "2025-09-30 22:20:51.964573", "step": 5452, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:51.998069", "step": 5452, "epoch": 3 }, { "type": "loss", "content": 0.0009108879021368921, "timestamp": "2025-09-30 22:20:52.008622", "step": 5453, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:52.061534", "step": 5453, "epoch": 3 }, { "type": "loss", "content": 0.0012465942418202758, "timestamp": "2025-09-30 22:20:52.073960", "step": 5454, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:52.133473", "step": 5454, "epoch": 3 }, { "type": "loss", "content": 0.004608785267919302, "timestamp": "2025-09-30 22:20:52.147199", "step": 5455, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:52.196103", "step": 5455, "epoch": 3 }, { "type": "loss", "content": 0.0007762848399579525, "timestamp": "2025-09-30 22:20:52.227939", "step": 5456, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:52.266593", "step": 5456, "epoch": 3 }, { "type": "loss", "content": 0.005198488011956215, "timestamp": "2025-09-30 22:20:52.276593", "step": 5457, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:52.323726", "step": 5457, "epoch": 3 }, { "type": "loss", "content": 0.0027350198943167925, "timestamp": "2025-09-30 22:20:52.337550", "step": 5458, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:52.379356", "step": 5458, "epoch": 3 }, { "type": "loss", "content": 0.0009995006257668138, "timestamp": "2025-09-30 22:20:52.391937", "step": 5459, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:52.427163", "step": 5459, "epoch": 3 }, { "type": "loss", "content": 0.0010448892135173082, "timestamp": "2025-09-30 22:20:52.458650", "step": 5460, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:52.499964", "step": 5460, "epoch": 3 }, { "type": "loss", "content": 0.0008325534290634096, "timestamp": "2025-09-30 22:20:52.516146", "step": 5461, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:52.553299", "step": 5461, "epoch": 3 }, { "type": "loss", "content": 0.002204886171966791, "timestamp": "2025-09-30 22:20:52.564692", "step": 5462, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:52.630410", "step": 5462, "epoch": 3 }, { "type": "loss", "content": 0.0005283255595713854, "timestamp": "2025-09-30 22:20:52.649602", "step": 5463, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:52.696554", "step": 5463, "epoch": 3 }, { "type": "loss", "content": 0.0007552019087597728, "timestamp": "2025-09-30 22:20:52.731094", "step": 5464, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:20:52.776441", "step": 5464, "epoch": 3 }, { "type": "loss", "content": 0.0012059420114383101, "timestamp": "2025-09-30 22:20:52.791890", "step": 5465, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:52.836147", "step": 5465, "epoch": 3 }, { "type": "loss", "content": 0.0018100308952853084, "timestamp": "2025-09-30 22:20:52.849918", "step": 5466, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:52.891998", "step": 5466, "epoch": 3 }, { "type": "loss", "content": 0.0005931039340794086, "timestamp": "2025-09-30 22:20:52.905922", "step": 5467, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:52.948160", "step": 5467, "epoch": 3 }, { "type": "loss", "content": 0.0002712627174332738, "timestamp": "2025-09-30 22:20:52.981582", "step": 5468, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:53.027342", "step": 5468, "epoch": 3 }, { "type": "loss", "content": 0.0028416018467396498, "timestamp": "2025-09-30 22:20:53.037242", "step": 5469, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:53.075209", "step": 5469, "epoch": 3 }, { "type": "loss", "content": 0.0011812286684289575, "timestamp": "2025-09-30 22:20:53.087800", "step": 5470, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:20:53.130085", "step": 5470, "epoch": 3 }, { "type": "loss", "content": 0.00046421645674854517, "timestamp": "2025-09-30 22:20:53.146022", "step": 5471, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:20:53.188102", "step": 5471, "epoch": 3 }, { "type": "loss", "content": 0.0011313254944980145, "timestamp": "2025-09-30 22:20:53.220023", "step": 5472, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:20:53.264112", "step": 5472, "epoch": 3 }, { "type": "loss", "content": 0.0023931600153446198, "timestamp": "2025-09-30 22:20:53.279806", "step": 5473, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:53.315078", "step": 5473, "epoch": 3 }, { "type": "loss", "content": 0.0008334001176990569, "timestamp": "2025-09-30 22:20:53.326422", "step": 5474, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:53.368283", "step": 5474, "epoch": 3 }, { "type": "loss", "content": 0.00035439120256341994, "timestamp": "2025-09-30 22:20:53.379364", "step": 5475, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:53.424988", "step": 5475, "epoch": 3 }, { "type": "loss", "content": 0.0028823537286370993, "timestamp": "2025-09-30 22:20:53.456516", "step": 5476, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-30 22:20:53.516608", "step": 5476, "epoch": 3 }, { "type": "loss", "content": 0.0009436103282496333, "timestamp": "2025-09-30 22:20:53.532318", "step": 5477, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:53.582510", "step": 5477, "epoch": 3 }, { "type": "loss", "content": 0.002098672790452838, "timestamp": "2025-09-30 22:20:53.601689", "step": 5478, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:20:53.666161", "step": 5478, "epoch": 3 }, { "type": "loss", "content": 0.004675887059420347, "timestamp": "2025-09-30 22:20:53.680075", "step": 5479, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-30 22:20:53.738357", "step": 5479, "epoch": 3 }, { "type": "loss", "content": 0.0021486529149115086, "timestamp": "2025-09-30 22:20:53.776605", "step": 5480, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:53.813850", "step": 5480, "epoch": 3 }, { "type": "loss", "content": 0.003002091310918331, "timestamp": "2025-09-30 22:20:53.822684", "step": 5481, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:53.860633", "step": 5481, "epoch": 3 }, { "type": "loss", "content": 0.00045192582183517516, "timestamp": "2025-09-30 22:20:53.872986", "step": 5482, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:53.921415", "step": 5482, "epoch": 3 }, { "type": "loss", "content": 0.0009709245641715825, "timestamp": "2025-09-30 22:20:53.935176", "step": 5483, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:20:53.984733", "step": 5483, "epoch": 3 }, { "type": "loss", "content": 0.0003787397581618279, "timestamp": "2025-09-30 22:20:54.027112", "step": 5484, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:54.065489", "step": 5484, "epoch": 3 }, { "type": "loss", "content": 0.003653504652902484, "timestamp": "2025-09-30 22:20:54.074463", "step": 5485, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:54.110825", "step": 5485, "epoch": 3 }, { "type": "loss", "content": 0.002409368986263871, "timestamp": "2025-09-30 22:20:54.124209", "step": 5486, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:20:54.162760", "step": 5486, "epoch": 3 }, { "type": "loss", "content": 0.005153408274054527, "timestamp": "2025-09-30 22:20:54.176478", "step": 5487, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:54.219308", "step": 5487, "epoch": 3 }, { "type": "loss", "content": 0.0031536994501948357, "timestamp": "2025-09-30 22:20:54.252572", "step": 5488, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:54.298481", "step": 5488, "epoch": 3 }, { "type": "loss", "content": 0.0007980418158695102, "timestamp": "2025-09-30 22:20:54.308636", "step": 5489, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:54.342262", "step": 5489, "epoch": 3 }, { "type": "loss", "content": 0.009608576074242592, "timestamp": "2025-09-30 22:20:54.353455", "step": 5490, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:20:54.392958", "step": 5490, "epoch": 3 }, { "type": "loss", "content": 0.007950414903461933, "timestamp": "2025-09-30 22:20:54.403635", "step": 5491, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:20:54.444519", "step": 5491, "epoch": 3 }, { "type": "loss", "content": 0.0034113333094865084, "timestamp": "2025-09-30 22:20:54.476463", "step": 5492, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-30 22:20:54.518991", "step": 5492, "epoch": 3 }, { "type": "loss", "content": 0.0006860059220343828, "timestamp": "2025-09-30 22:20:54.526994", "step": 5493, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:20:54.564196", "step": 5493, "epoch": 3 }, { "type": "loss", "content": 0.0003513667033985257, "timestamp": "2025-09-30 22:20:54.573032", "step": 5494, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:54.621320", "step": 5494, "epoch": 3 }, { "type": "loss", "content": 0.000984512735158205, "timestamp": "2025-09-30 22:20:54.634656", "step": 5495, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:54.692078", "step": 5495, "epoch": 3 }, { "type": "loss", "content": 0.004931764677166939, "timestamp": "2025-09-30 22:20:54.725509", "step": 5496, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:54.765897", "step": 5496, "epoch": 3 }, { "type": "loss", "content": 0.008381886407732964, "timestamp": "2025-09-30 22:20:54.776118", "step": 5497, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:20:54.813180", "step": 5497, "epoch": 3 }, { "type": "loss", "content": 0.0005858208751305938, "timestamp": "2025-09-30 22:20:54.825832", "step": 5498, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:20:54.867284", "step": 5498, "epoch": 3 }, { "type": "loss", "content": 0.0003775583754759282, "timestamp": "2025-09-30 22:20:54.880689", "step": 5499, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:20:54.929026", "step": 5499, "epoch": 3 }, { "type": "loss", "content": 0.001292352331802249, "timestamp": "2025-09-30 22:20:54.962242", "step": 5500, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 5500", "timestamp": "2025-09-30 22:21:00.015988", "step": 5500, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:21:00.069353", "step": 5500, "epoch": 3 }, { "type": "loss", "content": 0.0005804897518828511, "timestamp": "2025-09-30 22:21:00.082695", "step": 5501, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:21:00.135359", "step": 5501, "epoch": 3 }, { "type": "loss", "content": 0.00038161006523296237, "timestamp": "2025-09-30 22:21:00.147484", "step": 5502, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 592 ], "flops": 17560600598464 }, "timestamp": "2025-09-30 22:21:00.209709", "step": 5502, "epoch": 3 }, { "type": "loss", "content": 0.0006065990310162306, "timestamp": "2025-09-30 22:21:00.230754", "step": 5503, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:21:00.279084", "step": 5503, "epoch": 3 }, { "type": "loss", "content": 0.0013182084076106548, "timestamp": "2025-09-30 22:21:00.313432", "step": 5504, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-30 22:21:00.359227", "step": 5504, "epoch": 3 }, { "type": "loss", "content": 0.0002575173566583544, "timestamp": "2025-09-30 22:21:00.372387", "step": 5505, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:21:00.418829", "step": 5505, "epoch": 3 }, { "type": "loss", "content": 0.00010437212768010795, "timestamp": "2025-09-30 22:21:00.431142", "step": 5506, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:21:00.477294", "step": 5506, "epoch": 3 }, { "type": "loss", "content": 0.0005292315036058426, "timestamp": "2025-09-30 22:21:00.490689", "step": 5507, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:21:00.538793", "step": 5507, "epoch": 3 }, { "type": "loss", "content": 0.0005687709781341255, "timestamp": "2025-09-30 22:21:00.572192", "step": 5508, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:21:00.615810", "step": 5508, "epoch": 3 }, { "type": "loss", "content": 0.0007103682146407664, "timestamp": "2025-09-30 22:21:00.628486", "step": 5509, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:21:00.675749", "step": 5509, "epoch": 3 }, { "type": "loss", "content": 0.005789314396679401, "timestamp": "2025-09-30 22:21:00.686761", "step": 5510, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:21:00.721663", "step": 5510, "epoch": 3 }, { "type": "loss", "content": 0.0008344220696017146, "timestamp": "2025-09-30 22:21:00.738657", "step": 5511, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:21:00.799727", "step": 5511, "epoch": 3 }, { "type": "loss", "content": 0.0006502811447717249, "timestamp": "2025-09-30 22:21:00.834682", "step": 5512, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:21:00.888949", "step": 5512, "epoch": 3 }, { "type": "loss", "content": 0.00043549088877625763, "timestamp": "2025-09-30 22:21:00.901659", "step": 5513, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:21:00.954339", "step": 5513, "epoch": 3 }, { "type": "loss", "content": 0.00593127217143774, "timestamp": "2025-09-30 22:21:00.966974", "step": 5514, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:21:01.014981", "step": 5514, "epoch": 3 }, { "type": "loss", "content": 0.006700599100440741, "timestamp": "2025-09-30 22:21:01.028727", "step": 5515, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:21:01.068924", "step": 5515, "epoch": 3 }, { "type": "loss", "content": 0.0027859918773174286, "timestamp": "2025-09-30 22:21:01.105943", "step": 5516, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:21:01.146730", "step": 5516, "epoch": 3 }, { "type": "loss", "content": 0.003578424919396639, "timestamp": "2025-09-30 22:21:01.156527", "step": 5517, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-30 22:21:01.203669", "step": 5517, "epoch": 3 }, { "type": "loss", "content": 0.0036419949028640985, "timestamp": "2025-09-30 22:21:01.214874", "step": 5518, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-30 22:21:01.258877", "step": 5518, "epoch": 3 }, { "type": "loss", "content": 0.019855622202157974, "timestamp": "2025-09-30 22:21:01.278038", "step": 5519, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-30 22:21:01.319944", "step": 5519, "epoch": 3 }, { "type": "loss", "content": 0.00021206910605542362, "timestamp": "2025-09-30 22:21:01.354786", "step": 5520, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:21:04.043012", "step": 5520, "epoch": 3 }, { "type": "pplx", "content": 6.126922369940893, "timestamp": "2025-09-30 22:21:04.047684", "step": 5520, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:21:04.088921", "step": 5520, "epoch": 3 }, { "type": "loss", "content": 0.0011069091269746423, "timestamp": "2025-09-30 22:21:04.101936", "step": 5521, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-30 22:21:04.157703", "step": 5521, "epoch": 3 }, { "type": "loss", "content": 0.0017075618961825967, "timestamp": "2025-09-30 22:21:04.175482", "step": 5522, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-30 22:21:04.229684", "step": 5522, "epoch": 3 }, { "type": "loss", "content": 0.004409335553646088, "timestamp": "2025-09-30 22:21:04.245572", "step": 5523, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:21:04.283940", "step": 5523, "epoch": 3 }, { "type": "loss", "content": 0.00010646448208717629, "timestamp": "2025-09-30 22:21:04.326210", "step": 5524, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-30 22:21:04.370348", "step": 5524, "epoch": 3 }, { "type": "loss", "content": 0.00014393814490176737, "timestamp": "2025-09-30 22:21:04.380405", "step": 5525, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:21:04.421144", "step": 5525, "epoch": 3 }, { "type": "loss", "content": 0.0021583314519375563, "timestamp": "2025-09-30 22:21:04.434490", "step": 5526, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:21:04.492165", "step": 5526, "epoch": 3 }, { "type": "loss", "content": 0.0008602791931480169, "timestamp": "2025-09-30 22:21:04.505496", "step": 5527, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-30 22:21:04.561474", "step": 5527, "epoch": 3 }, { "type": "loss", "content": 0.0017894088523462415, "timestamp": "2025-09-30 22:21:04.596053", "step": 5528, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-30 22:21:04.644908", "step": 5528, "epoch": 3 }, { "type": "loss", "content": 0.003701946698129177, "timestamp": "2025-09-30 22:21:04.657583", "step": 5529, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-30 22:21:04.703046", "step": 5529, "epoch": 3 }, { "type": "loss", "content": 0.0004692415823228657, "timestamp": "2025-09-30 22:21:04.715627", "step": 5530, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-30 22:21:04.757924", "step": 5530, "epoch": 3 }, { "type": "loss", "content": 0.007988791912794113, "timestamp": "2025-09-30 22:21:04.774942", "step": 5531, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-30 22:21:04.811155", "step": 5531, "epoch": 3 }, { "type": "loss", "content": 0.010778653435409069, "timestamp": "2025-09-30 22:21:04.839679", "step": 5532, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 3, 240 ], "batch_size": 8, "flops": 4746011128704 } ], "timestamp": "2025-09-30 22:21:07.479130", "step": 5532, "epoch": 3 }, { "type": "pplx", "content": 6.123145012094139, "timestamp": "2025-09-30 22:21:07.498095", "step": 5532, "epoch": 3 }, { "type": "best_pplx", "content": 5.286725956468357, "timestamp": "2025-09-30 22:21:07.515096", "step": 5532, "epoch": 3 }, { "type": "best_step", "content": 1380, "timestamp": "2025-09-30 22:21:07.530192", "step": 5532, "epoch": 3 }, { "type": "total_pplx_flops", "content": 24378677094380800, "timestamp": "2025-09-30 22:21:07.545013", "step": 5532, "epoch": 3 }, { "type": "total_train_flops", "content": 49118667663965760, "timestamp": "2025-09-30 22:21:07.564275", "step": 5532, "epoch": 3 } ], "best_evals": { "pplx": { "score": 5.286725956468357, "step": 1380 }, "rougel": { "precision": 0.5759702728404714, "recall": 0.5677652955140957, "fmeasure": 0.5673456003100639 } } }